Skip to content

Commit 6e9b033

Browse files
authored
Merge branch 'PyThaiNLP:main' into fix-test-main-lib-workflow
2 parents 85b67ff + 2a67218 commit 6e9b033

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

src/four_bytes_str/custom_regex.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
/**
55
* Regex for a custom four-byte string.
6-
*
6+
*
77
* This is a result of an attempt to create a formatter
88
* which translates normal, human readable thai regex
99
* into 4-bytes zero-left-pad bytes regex pattern string
@@ -15,6 +15,7 @@ use regex_syntax::{
1515
is_meta_character, Parser,
1616
};
1717
use std::{error::Error, fmt::Display};
18+
1819
trait ToCustomStringRepr {
1920
fn to_custom_byte_repr(&self) -> Result<String>;
2021
}
@@ -28,6 +29,7 @@ enum UnsupportedCustomRegexParserError {
2829
AnchorStartLine,
2930
AnchorEndLine,
3031
}
32+
3133
enum IterableHirKind {
3234
Alternation(Vec<Hir>),
3335
Concat(Vec<Hir>),
@@ -56,13 +58,15 @@ impl Display for UnsupportedCustomRegexParserError {
5658
}
5759
}
5860
}
61+
5962
impl Error for UnsupportedCustomRegexParserError {}
6063

6164
impl ToCustomStringRepr for Hir {
6265
fn to_custom_byte_repr(&self) -> Result<String> {
6366
self.kind().to_custom_byte_repr()
6467
}
6568
}
69+
6670
impl ToCustomStringRepr for HirKind {
6771
fn to_custom_byte_repr(&self) -> Result<String> {
6872
match self {
@@ -80,6 +84,7 @@ impl ToCustomStringRepr for HirKind {
8084
}
8185
}
8286
}
87+
8388
impl ToCustomStringRepr for Anchor {
8489
fn to_custom_byte_repr(&self) -> Result<String> {
8590
match self {
@@ -90,16 +95,18 @@ impl ToCustomStringRepr for Anchor {
9095
}
9196
}
9297
}
98+
9399
impl ToCustomStringRepr for LiteralEnum {
94100
fn to_custom_byte_repr(&self) -> Result<String> {
95101
match self {
96102
LiteralEnum::Unicode(a) => Ok(a.to_four_byte_string()),
97-
LiteralEnum::Byte(b) => Err(AnyError::new(
103+
LiteralEnum::Byte(_b) => Err(AnyError::new(
98104
UnsupportedCustomRegexParserError::ByteLiteral,
99105
)),
100106
}
101107
}
102108
}
109+
103110
impl ToCustomStringRepr for Class {
104111
fn to_custom_byte_repr(&self) -> Result<String> {
105112
match self {
@@ -108,6 +115,7 @@ impl ToCustomStringRepr for Class {
108115
}
109116
}
110117
}
118+
111119
impl ToCustomStringRepr for Repetition {
112120
fn to_custom_byte_repr(&self) -> Result<String> {
113121
let symbol: Result<String> = match &self.kind {
@@ -143,6 +151,7 @@ impl ToCustomStringRepr for Repetition {
143151
}
144152
}
145153
}
154+
146155
impl ToCustomStringRepr for IterableHirKind {
147156
fn to_custom_byte_repr(&self) -> Result<String> {
148157
match self {
@@ -251,6 +260,7 @@ impl ToCustomStringRepr for IterableHirKind {
251260
}
252261
}
253262
}
263+
254264
impl ToCustomStringRepr for Group {
255265
fn to_custom_byte_repr(&self) -> Result<String> {
256266
let recur = match self.hir.kind() {
@@ -288,6 +298,7 @@ enum UTFBytesLength {
288298
Three,
289299
Four,
290300
}
301+
291302
fn char_class(character: char) -> UTFBytesLength {
292303
let mut bytes_buffer: [u8; 4] = [0; 4];
293304

@@ -307,6 +318,7 @@ fn is_in_range<T: PartialEq + PartialOrd>(value: T, range: (T, T)) -> bool {
307318
trait PadLeftZeroFourBytesRep {
308319
fn to_four_byte_string(&self) -> String;
309320
}
321+
310322
fn escape_meta_character(c: char) -> String {
311323
if is_meta_character(c) {
312324
format!(r"\{}", c)
@@ -316,6 +328,7 @@ fn escape_meta_character(c: char) -> String {
316328
c.to_string()
317329
}
318330
}
331+
319332
impl PadLeftZeroFourBytesRep for &[ClassUnicodeRange] {
320333
fn to_four_byte_string(&self) -> String {
321334
let urange = self;
@@ -363,6 +376,7 @@ impl PadLeftZeroFourBytesRep for &[ClassUnicodeRange] {
363376
}
364377
}
365378
}
379+
366380
impl PadLeftZeroFourBytesRep for char {
367381
fn to_four_byte_string(&self) -> String {
368382
let character = self;

0 commit comments

Comments
 (0)