Skip to content

Commit

Permalink
Merge branch 'PyThaiNLP:main' into fix-test-main-lib-workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
bact authored Nov 10, 2024
2 parents 85b67ff + 2a67218 commit 6e9b033
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions src/four_bytes_str/custom_regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

/**
* Regex for a custom four-byte string.
*
*
* This is a result of an attempt to create a formatter
* which translates normal, human readable thai regex
* into 4-bytes zero-left-pad bytes regex pattern string
Expand All @@ -15,6 +15,7 @@ use regex_syntax::{
is_meta_character, Parser,
};
use std::{error::Error, fmt::Display};

trait ToCustomStringRepr {
fn to_custom_byte_repr(&self) -> Result<String>;
}
Expand All @@ -28,6 +29,7 @@ enum UnsupportedCustomRegexParserError {
AnchorStartLine,
AnchorEndLine,
}

enum IterableHirKind {
Alternation(Vec<Hir>),
Concat(Vec<Hir>),
Expand Down Expand Up @@ -56,13 +58,15 @@ impl Display for UnsupportedCustomRegexParserError {
}
}
}

impl Error for UnsupportedCustomRegexParserError {}

impl ToCustomStringRepr for Hir {
fn to_custom_byte_repr(&self) -> Result<String> {
self.kind().to_custom_byte_repr()
}
}

impl ToCustomStringRepr for HirKind {
fn to_custom_byte_repr(&self) -> Result<String> {
match self {
Expand All @@ -80,6 +84,7 @@ impl ToCustomStringRepr for HirKind {
}
}
}

impl ToCustomStringRepr for Anchor {
fn to_custom_byte_repr(&self) -> Result<String> {
match self {
Expand All @@ -90,16 +95,18 @@ impl ToCustomStringRepr for Anchor {
}
}
}

impl ToCustomStringRepr for LiteralEnum {
fn to_custom_byte_repr(&self) -> Result<String> {
match self {
LiteralEnum::Unicode(a) => Ok(a.to_four_byte_string()),
LiteralEnum::Byte(b) => Err(AnyError::new(
LiteralEnum::Byte(_b) => Err(AnyError::new(
UnsupportedCustomRegexParserError::ByteLiteral,
)),
}
}
}

impl ToCustomStringRepr for Class {
fn to_custom_byte_repr(&self) -> Result<String> {
match self {
Expand All @@ -108,6 +115,7 @@ impl ToCustomStringRepr for Class {
}
}
}

impl ToCustomStringRepr for Repetition {
fn to_custom_byte_repr(&self) -> Result<String> {
let symbol: Result<String> = match &self.kind {
Expand Down Expand Up @@ -143,6 +151,7 @@ impl ToCustomStringRepr for Repetition {
}
}
}

impl ToCustomStringRepr for IterableHirKind {
fn to_custom_byte_repr(&self) -> Result<String> {
match self {
Expand Down Expand Up @@ -251,6 +260,7 @@ impl ToCustomStringRepr for IterableHirKind {
}
}
}

impl ToCustomStringRepr for Group {
fn to_custom_byte_repr(&self) -> Result<String> {
let recur = match self.hir.kind() {
Expand Down Expand Up @@ -288,6 +298,7 @@ enum UTFBytesLength {
Three,
Four,
}

fn char_class(character: char) -> UTFBytesLength {
let mut bytes_buffer: [u8; 4] = [0; 4];

Expand All @@ -307,6 +318,7 @@ fn is_in_range<T: PartialEq + PartialOrd>(value: T, range: (T, T)) -> bool {
trait PadLeftZeroFourBytesRep {
fn to_four_byte_string(&self) -> String;
}

fn escape_meta_character(c: char) -> String {
if is_meta_character(c) {
format!(r"\{}", c)
Expand All @@ -316,6 +328,7 @@ fn escape_meta_character(c: char) -> String {
c.to_string()
}
}

impl PadLeftZeroFourBytesRep for &[ClassUnicodeRange] {
fn to_four_byte_string(&self) -> String {
let urange = self;
Expand Down Expand Up @@ -363,6 +376,7 @@ impl PadLeftZeroFourBytesRep for &[ClassUnicodeRange] {
}
}
}

impl PadLeftZeroFourBytesRep for char {
fn to_four_byte_string(&self) -> String {
let character = self;
Expand Down

0 comments on commit 6e9b033

Please sign in to comment.