diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 9c59284a5..23efe34d8 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -448,15 +448,15 @@ dependencies = [ [[package]] name = "tokenizers" -version = "0.0.7" +version = "0.0.8" dependencies = [ "pyo3 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokenizers-lib 0.0.7", + "tokenizers-lib 0.0.8", ] [[package]] name = "tokenizers-lib" -version = "0.0.7" +version = "0.0.8" dependencies = [ "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 1ab24b930..6315f5069 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tokenizers" -version = "0.0.7" +version = "0.0.8" authors = ["Anthony MOI "] edition = "2018" diff --git a/bindings/python/README.md b/bindings/python/README.md index ca6e959eb..f2d05b948 100644 --- a/bindings/python/README.md +++ b/bindings/python/README.md @@ -61,7 +61,7 @@ bpe = models.BPE.from_files(vocab, merges) tokenizer = Tokenizer(bpe) # Customize pre-tokenization and decoding -tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new()) +tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new(True)) tokenizer.with_decoder(decoders.ByteLevel.new()) # And then encode: @@ -85,7 +85,7 @@ from tokenizers import Tokenizer, models, pre_tokenizers, decoders, trainers tokenizer = Tokenizer(models.BPE.empty()) # Customize pre-tokenization and decoding -tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new()) +tokenizer.with_pre_tokenizer(pre_tokenizers.ByteLevel.new(True)) tokenizer.with_decoder(decoders.ByteLevel.new()) # And then train diff --git a/bindings/python/setup.py b/bindings/python/setup.py index ff44c1147..7eae8b9d2 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -3,7 +3,7 @@ setup( name="tokenizers", - version="0.0.7", + version="0.0.8", description="Fast and Customizable Tokenizers", long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown", diff --git a/tokenizers/Cargo.lock b/tokenizers/Cargo.lock index 0e896fb55..eb997ab5b 100644 --- a/tokenizers/Cargo.lock +++ b/tokenizers/Cargo.lock @@ -265,7 +265,7 @@ dependencies = [ [[package]] name = "tokenizers-lib" -version = "0.0.7" +version = "0.0.8" dependencies = [ "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/tokenizers/Cargo.toml b/tokenizers/Cargo.toml index 897bf387d..076525c69 100644 --- a/tokenizers/Cargo.toml +++ b/tokenizers/Cargo.toml @@ -2,7 +2,7 @@ authors = ["Anthony MOI "] edition = "2018" name = "tokenizers-lib" -version = "0.0.7" +version = "0.0.8" [[bin]] name = "cli" path = "src/cli.rs"