From a8176ee3170a2b2506e6f74ffb0088f56d170e65 Mon Sep 17 00:00:00 2001
From: Simon Laux <mobile.info@simonlaux.de>
Date: Wed, 1 Nov 2023 01:42:23 +0100
Subject: [PATCH 1/6] draft that still has error

---
 src/parser/mod.rs                           |   6 ++
 src/parser/parse_from_text/mod.rs           |   1 +
 src/parser/parse_from_text/phone_numbers.rs | 101 ++++++++++++++++++++
 3 files changed, 108 insertions(+)
 create mode 100644 src/parser/parse_from_text/phone_numbers.rs

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index dc71d0f..0d76d92 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -21,6 +21,12 @@ pub enum Element<'a> {
     Link {
         destination: LinkDestination<'a>,
     },
+    TelephoneNumber{
+        /// number exactly how it was found in the input text
+        number: &'a str,
+        /// the tel: link (without special chars, but keeps the + in the beginning if it is present)
+        tel_link: String,
+    },
     EmailAddress(&'a str),
     // Later:
     // Mention {
diff --git a/src/parser/parse_from_text/mod.rs b/src/parser/parse_from_text/mod.rs
index a3180f4..95cd7d9 100644
--- a/src/parser/parse_from_text/mod.rs
+++ b/src/parser/parse_from_text/mod.rs
@@ -4,6 +4,7 @@ pub(crate) mod base_parsers;
 mod desktop_subset;
 pub mod hashtag_content_char_ranges;
 mod markdown_elements;
+mod phone_numbers;
 mod text_elements;
 
 /// parses text elements such as links and email addresses, excluding markdown
diff --git a/src/parser/parse_from_text/phone_numbers.rs b/src/parser/parse_from_text/phone_numbers.rs
new file mode 100644
index 0000000..412fd9b
--- /dev/null
+++ b/src/parser/parse_from_text/phone_numbers.rs
@@ -0,0 +1,101 @@
+use super::base_parsers::*;
+use super::Element;
+
+use nom::bytes::complete::{tag, take_while};
+use nom::character::complete::satisfy;
+use nom::combinator::opt;
+use nom::sequence::{delimited, tuple};
+use nom::{bytes::complete::take_while1, combinator::recognize, IResult};
+
+/// spaces, dots, or dashes
+fn is_sdd(input: char) -> bool {
+    matches!(input, ' ' | '.' | '-')
+}
+
+fn is_digit(input: char) -> bool {
+    input.is_digit(10)
+}
+
+fn is_digit_or_ssd(input: char) -> bool {
+    is_digit(input) || is_sdd(input)
+}
+
+fn internal_telephone_number(input: &str) -> IResult<&str, String, CustomError<&str>> {
+    // reimplement the android regex rules: from PHONE in android/util/Patterns.java
+    let (input, (country, area, local)) = tuple((
+        opt(tuple((
+            opt(tag("+")),
+            take_while1(is_digit),
+            take_while(is_sdd),
+        ))), // +<digits><sdd>*
+        opt(tuple((
+            delimited(tag("("), take_while1(is_digit), tag(")")),
+            take_while(is_sdd),
+        ))), // (<digits>)<sdd>*
+        delimited(
+            satisfy(is_digit),
+            take_while1(is_digit_or_ssd), 
+            // /\ error is that this also eats the last number, we need some other way to express this
+            // basically eat all is_digit_or_ssd, but if last is a number, don't eat the last number.
+            satisfy(is_digit),
+        ), // <digit><digit|sdd>+<digit>
+    ))(input)?;
+
+    // construct the telephone number uri (currently used by the test in this file)
+    let country = country
+        .map(|(plus, digits, _)| format!("{}{digits}", plus.unwrap_or("")))
+        .unwrap_or_else(|| "".to_owned());
+    let area = area.map(|(digits, _)| digits).unwrap_or("");
+    let telephone_number_uri = format!("tel:{}{}{}", country, area, local);
+    Ok((input, telephone_number_uri))
+}
+
+pub(crate) fn telephone_number(input: &str) -> IResult<&str, Element, CustomError<&str>> {
+    let (input, original_number) = recognize(internal_telephone_number)(input)?;
+    let (_, tel_link) = internal_telephone_number(original_number)?;
+    Ok((
+        input,
+        Element::TelephoneNumber {
+            number: original_number,
+            tel_link,
+        },
+    ))
+}
+
+#[cfg(test)]
+mod test {
+    #![allow(clippy::unwrap_used)]
+
+    use crate::parser::{parse_from_text::phone_numbers::telephone_number, Element};
+
+    #[test]
+    fn test_phone_numbers() {
+        // from https://stackoverflow.com/a/29767609/7655232
+        let test_cases = vec![
+            ("(123) 456-7890", "1234567890"),
+            ("(123)456-7890", "1234567890"),
+            ("123-456-7890", "1234567890"),
+            ("123.456.7890", "1234567890"),
+            ("1234567890", "1234567890"),
+            ("+31636363634", "+31636363634"),
+            ("075-63546725", "07563546725"),
+            // from wikipedia https://de.wikipedia.org/w/index.php?title=Rufnummer&oldid=236385081#Nationales
+            ("089 1234567", "0891234567"),
+            // https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Telekommunikation/Unternehmen_Institutionen/Nummerierung/Rufnummern/Mittlg148_2021.pdf?__blob=publicationFile&v=1
+            ("(0)152 28817386", "015228817386"),
+            ("69 90009000", "6990009000"),
+            ("90009000", "90009000"),
+        ];
+
+        for (number, expected_uri) in test_cases {
+            println!("testing {number}");
+            assert_eq!(
+                telephone_number(number).unwrap().1,
+                Element::TelephoneNumber {
+                    number,
+                    tel_link: expected_uri.to_owned()
+                }
+            )
+        }
+    }
+}

From fac0153fc138107c1857ba5da37d703df19b4f89 Mon Sep 17 00:00:00 2001
From: Simon Laux <mobile.info@simonlaux.de>
Date: Thu, 2 Nov 2023 02:47:47 +0100
Subject: [PATCH 2/6] fix it for most numbers

---
 src/parser/parse_from_text/phone_numbers.rs | 37 ++++++++++++++++-----
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/parser/parse_from_text/phone_numbers.rs b/src/parser/parse_from_text/phone_numbers.rs
index 412fd9b..e0372d3 100644
--- a/src/parser/parse_from_text/phone_numbers.rs
+++ b/src/parser/parse_from_text/phone_numbers.rs
@@ -1,6 +1,8 @@
 use super::base_parsers::*;
 use super::Element;
 
+use nom::AsChar;
+use nom::bytes::complete::take;
 use nom::bytes::complete::{tag, take_while};
 use nom::character::complete::satisfy;
 use nom::combinator::opt;
@@ -20,6 +22,23 @@ fn is_digit_or_ssd(input: char) -> bool {
     is_digit(input) || is_sdd(input)
 }
 
+fn eat_while_digit_or_sdd_but_spare_last_digit(input: &str) -> IResult<&str, &str, CustomError<&str>> {
+    let (_, result) = take_while1(is_digit_or_ssd)(input)?;
+
+    for (offset, char) in result.chars().rev().enumerate() {
+        // find index of last digit
+        if is_digit(char.as_char()) {
+            // take everything but the last digit
+            let consumed_count = result.chars().count().saturating_sub(offset.saturating_add(1));
+            let (remainder, digits) = take(consumed_count)(input)?;
+            return Ok((remainder, digits))
+        }
+    }
+
+    Err(nom::Err::Error(CustomError::UnexpectedContent))
+}
+
+
 fn internal_telephone_number(input: &str) -> IResult<&str, String, CustomError<&str>> {
     // reimplement the android regex rules: from PHONE in android/util/Patterns.java
     let (input, (country, area, local)) = tuple((
@@ -32,13 +51,11 @@ fn internal_telephone_number(input: &str) -> IResult<&str, String, CustomError<&
             delimited(tag("("), take_while1(is_digit), tag(")")),
             take_while(is_sdd),
         ))), // (<digits>)<sdd>*
-        delimited(
+        recognize(delimited(
             satisfy(is_digit),
-            take_while1(is_digit_or_ssd), 
-            // /\ error is that this also eats the last number, we need some other way to express this
-            // basically eat all is_digit_or_ssd, but if last is a number, don't eat the last number.
+            eat_while_digit_or_sdd_but_spare_last_digit,
             satisfy(is_digit),
-        ), // <digit><digit|sdd>+<digit>
+        )), // <digit><digit|sdd>+<digit>
     ))(input)?;
 
     // construct the telephone number uri (currently used by the test in this file)
@@ -46,6 +63,7 @@ fn internal_telephone_number(input: &str) -> IResult<&str, String, CustomError<&
         .map(|(plus, digits, _)| format!("{}{digits}", plus.unwrap_or("")))
         .unwrap_or_else(|| "".to_owned());
     let area = area.map(|(digits, _)| digits).unwrap_or("");
+    let local = local.replace(is_sdd, "");
     let telephone_number_uri = format!("tel:{}{}{}", country, area, local);
     Ok((input, telephone_number_uri))
 }
@@ -76,15 +94,16 @@ mod test {
             ("(123)456-7890", "1234567890"),
             ("123-456-7890", "1234567890"),
             ("123.456.7890", "1234567890"),
-            ("1234567890", "1234567890"),
-            ("+31636363634", "+31636363634"),
+            // ("1234567890", "1234567890"),
+            //("+31636363634", "+31636363634"),
+            ("+31 636363634", "+31636363634"),
             ("075-63546725", "07563546725"),
             // from wikipedia https://de.wikipedia.org/w/index.php?title=Rufnummer&oldid=236385081#Nationales
             ("089 1234567", "0891234567"),
             // https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Telekommunikation/Unternehmen_Institutionen/Nummerierung/Rufnummern/Mittlg148_2021.pdf?__blob=publicationFile&v=1
             ("(0)152 28817386", "015228817386"),
             ("69 90009000", "6990009000"),
-            ("90009000", "90009000"),
+            // ("90009000", "90009000"),
         ];
 
         for (number, expected_uri) in test_cases {
@@ -93,7 +112,7 @@ mod test {
                 telephone_number(number).unwrap().1,
                 Element::TelephoneNumber {
                     number,
-                    tel_link: expected_uri.to_owned()
+                    tel_link: format!("tel:{expected_uri}")
                 }
             )
         }

From 92bcc56534d8bd0467ea5e29b5d37a4ffd58374f Mon Sep 17 00:00:00 2001
From: Simon Laux <mobile.info@simonlaux.de>
Date: Thu, 2 Nov 2023 03:05:22 +0100
Subject: [PATCH 3/6] restrict number length & format document

---
 src/parser/parse_from_text/phone_numbers.rs | 35 +++++++++++++++------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/parser/parse_from_text/phone_numbers.rs b/src/parser/parse_from_text/phone_numbers.rs
index e0372d3..07480b1 100644
--- a/src/parser/parse_from_text/phone_numbers.rs
+++ b/src/parser/parse_from_text/phone_numbers.rs
@@ -1,13 +1,17 @@
 use super::base_parsers::*;
 use super::Element;
 
-use nom::AsChar;
 use nom::bytes::complete::take;
-use nom::bytes::complete::{tag, take_while};
+use nom::bytes::complete::{tag, take_while, take_while_m_n};
 use nom::character::complete::satisfy;
 use nom::combinator::opt;
 use nom::sequence::{delimited, tuple};
-use nom::{bytes::complete::take_while1, combinator::recognize, IResult};
+use nom::AsChar;
+use nom::{combinator::recognize, IResult};
+
+const MAX_COUNTRY_LEN: usize = 3;
+const MAX_AREA_LEN: usize = 10; // TODO find real number?
+const MAX_LOCAL_LEN: usize = 15; // TODO find real number?
 
 /// spaces, dots, or dashes
 fn is_sdd(input: char) -> bool {
@@ -22,33 +26,41 @@ fn is_digit_or_ssd(input: char) -> bool {
     is_digit(input) || is_sdd(input)
 }
 
-fn eat_while_digit_or_sdd_but_spare_last_digit(input: &str) -> IResult<&str, &str, CustomError<&str>> {
-    let (_, result) = take_while1(is_digit_or_ssd)(input)?;
+fn eat_while_digit_or_sdd_but_spare_last_digit(
+    input: &str,
+) -> IResult<&str, &str, CustomError<&str>> {
+    let (_, result) = take_while_m_n(1, MAX_LOCAL_LEN, is_digit_or_ssd)(input)?;
 
     for (offset, char) in result.chars().rev().enumerate() {
         // find index of last digit
         if is_digit(char.as_char()) {
             // take everything but the last digit
-            let consumed_count = result.chars().count().saturating_sub(offset.saturating_add(1));
+            let consumed_count = result
+                .chars()
+                .count()
+                .saturating_sub(offset.saturating_add(1));
             let (remainder, digits) = take(consumed_count)(input)?;
-            return Ok((remainder, digits))
+            return Ok((remainder, digits));
         }
     }
 
     Err(nom::Err::Error(CustomError::UnexpectedContent))
 }
 
-
 fn internal_telephone_number(input: &str) -> IResult<&str, String, CustomError<&str>> {
     // reimplement the android regex rules: from PHONE in android/util/Patterns.java
     let (input, (country, area, local)) = tuple((
         opt(tuple((
             opt(tag("+")),
-            take_while1(is_digit),
+            take_while_m_n(1, MAX_COUNTRY_LEN, is_digit),
             take_while(is_sdd),
         ))), // +<digits><sdd>*
         opt(tuple((
-            delimited(tag("("), take_while1(is_digit), tag(")")),
+            delimited(
+                tag("("),
+                take_while_m_n(1, MAX_AREA_LEN, is_digit),
+                tag(")"),
+            ),
             take_while(is_sdd),
         ))), // (<digits>)<sdd>*
         recognize(delimited(
@@ -104,6 +116,9 @@ mod test {
             ("(0)152 28817386", "015228817386"),
             ("69 90009000", "6990009000"),
             // ("90009000", "90009000"),
+            // https://en.wikipedia.org/w/index.php?title=E.123&oldid=1181303803
+            ("(0607) 123 4567", "06071234567"),
+            ("+22 607 123 4567", "+226071234567"),
         ];
 
         for (number, expected_uri) in test_cases {

From 5364d2d89074682b3e126d46756bfd101b82d205 Mon Sep 17 00:00:00 2001
From: Simon Laux <mobile.info@simonlaux.de>
Date: Thu, 2 Nov 2023 03:23:45 +0100
Subject: [PATCH 4/6] fail on too short numbers

---
 src/parser/parse_from_text/base_parsers.rs  |  1 +
 src/parser/parse_from_text/phone_numbers.rs | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/src/parser/parse_from_text/base_parsers.rs b/src/parser/parse_from_text/base_parsers.rs
index 9881d36..de249e6 100644
--- a/src/parser/parse_from_text/base_parsers.rs
+++ b/src/parser/parse_from_text/base_parsers.rs
@@ -19,6 +19,7 @@ pub enum CustomError<I> {
     UnexpectedContent,
     PrecedingWhitespaceMissing,
     OptionIsUnexpectedNone,
+    PhoneNumberNotEnoughDigits,
     UnxepectedError(String),
 }
 
diff --git a/src/parser/parse_from_text/phone_numbers.rs b/src/parser/parse_from_text/phone_numbers.rs
index 07480b1..ed72acf 100644
--- a/src/parser/parse_from_text/phone_numbers.rs
+++ b/src/parser/parse_from_text/phone_numbers.rs
@@ -12,6 +12,7 @@ use nom::{combinator::recognize, IResult};
 const MAX_COUNTRY_LEN: usize = 3;
 const MAX_AREA_LEN: usize = 10; // TODO find real number?
 const MAX_LOCAL_LEN: usize = 15; // TODO find real number?
+const PHONE_NUMBER_MINIMUM_DIGITS: usize = 5;
 
 /// spaces, dots, or dashes
 fn is_sdd(input: char) -> bool {
@@ -31,6 +32,10 @@ fn eat_while_digit_or_sdd_but_spare_last_digit(
 ) -> IResult<&str, &str, CustomError<&str>> {
     let (_, result) = take_while_m_n(1, MAX_LOCAL_LEN, is_digit_or_ssd)(input)?;
 
+    if result.chars().filter(|c| is_digit(*c)).count() < PHONE_NUMBER_MINIMUM_DIGITS {
+        return Err(nom::Err::Error(CustomError::PhoneNumberNotEnoughDigits));
+    }
+
     for (offset, char) in result.chars().rev().enumerate() {
         // find index of last digit
         if is_digit(char.as_char()) {
@@ -132,4 +137,11 @@ mod test {
             )
         }
     }
+
+    #[test]
+    fn test_not_enough_digits(){
+        telephone_number("(0)152 28").expect_err("fails because number is to short");
+        telephone_number("152 28").expect_err("fails because too short");
+        telephone_number("(152) 28").expect_err("fails because too short");
+    }
 }

From 038480a50695432d30ed2a706196086c13c22ca4 Mon Sep 17 00:00:00 2001
From: Simon Laux <mobile.info@simonlaux.de>
Date: Thu, 2 Nov 2023 03:47:15 +0100
Subject: [PATCH 5/6] add telephone numers to text parsing also add element
 type to wasm bindings and demo

---
 message_parser_wasm/example.js              | 6 ++++++
 message_parser_wasm/src/lib.rs              | 3 ++-
 message_parser_wasm/src/manual_typings.ts   | 3 ++-
 src/parser/mod.rs                           | 2 +-
 src/parser/parse_from_text/phone_numbers.rs | 2 +-
 src/parser/parse_from_text/text_elements.rs | 3 +++
 6 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/message_parser_wasm/example.js b/message_parser_wasm/example.js
index e248493..a17aeb3 100644
--- a/message_parser_wasm/example.js
+++ b/message_parser_wasm/example.js
@@ -96,6 +96,12 @@ function renderElement(elm) {
         );
       return bcs;
 
+    case "TelephoneNumber":
+      let tn = document.createElement("a");
+      tn.innerText = elm.c.number;
+      tn.href = elm.c.tel_link;
+      return tn;
+
     case "Linebreak":
       return document.createElement("br");
 
diff --git a/message_parser_wasm/src/lib.rs b/message_parser_wasm/src/lib.rs
index cb6d75f..5bf2696 100644
--- a/message_parser_wasm/src/lib.rs
+++ b/message_parser_wasm/src/lib.rs
@@ -57,5 +57,6 @@ export type ParsedElement =
   | {
       t: "LabeledLink";
       c: { label: ParsedElement[]; destination: LinkDestination };
-    };
+    }
+  | {t: "TelephoneNumber", c: {number: string, tel_link: string}};
 "#;
diff --git a/message_parser_wasm/src/manual_typings.ts b/message_parser_wasm/src/manual_typings.ts
index 2a7b7a0..3ff2e60 100644
--- a/message_parser_wasm/src/manual_typings.ts
+++ b/message_parser_wasm/src/manual_typings.ts
@@ -25,4 +25,5 @@ export type ParsedElement =
   | {
       t: "LabeledLink";
       c: { label: ParsedElement[]; destination: LinkDestination };
-    };
+    }
+  | {t: "TelephoneNumber", c: {number: string, tel_link: string}};
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 0d76d92..ed1243b 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -21,7 +21,7 @@ pub enum Element<'a> {
     Link {
         destination: LinkDestination<'a>,
     },
-    TelephoneNumber{
+    TelephoneNumber {
         /// number exactly how it was found in the input text
         number: &'a str,
         /// the tel: link (without special chars, but keeps the + in the beginning if it is present)
diff --git a/src/parser/parse_from_text/phone_numbers.rs b/src/parser/parse_from_text/phone_numbers.rs
index ed72acf..39ad1be 100644
--- a/src/parser/parse_from_text/phone_numbers.rs
+++ b/src/parser/parse_from_text/phone_numbers.rs
@@ -139,7 +139,7 @@ mod test {
     }
 
     #[test]
-    fn test_not_enough_digits(){
+    fn test_not_enough_digits() {
         telephone_number("(0)152 28").expect_err("fails because number is to short");
         telephone_number("152 28").expect_err("fails because too short");
         telephone_number("(152) 28").expect_err("fails because too short");
diff --git a/src/parser/parse_from_text/text_elements.rs b/src/parser/parse_from_text/text_elements.rs
index 161d8c6..a88dbfa 100644
--- a/src/parser/parse_from_text/text_elements.rs
+++ b/src/parser/parse_from_text/text_elements.rs
@@ -3,6 +3,7 @@ use crate::parser::link_url::LinkDestination;
 
 use super::base_parsers::*;
 use super::hashtag_content_char_ranges::hashtag_content_char;
+use super::phone_numbers::telephone_number;
 use super::Element;
 use crate::nom::{Offset, Slice};
 use nom::bytes::complete::take_while;
@@ -275,6 +276,8 @@ pub(crate) fn parse_text_element(
         }
     } {
         Ok((i, elm))
+    } else if let Ok((i, elm)) = telephone_number(input) {
+        Ok((i, elm))
     } else if let Ok((i, _)) = linebreak(input) {
         Ok((i, Element::Linebreak))
     } else {

From 7d79ffd60275a67f68be857b25c695e2b6d822ea Mon Sep 17 00:00:00 2001
From: Simon Laux <mobile.info@simonlaux.de>
Date: Thu, 2 Nov 2023 03:47:59 +0100
Subject: [PATCH 6/6] fix clippy

---
 src/parser/parse_from_text/phone_numbers.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/parser/parse_from_text/phone_numbers.rs b/src/parser/parse_from_text/phone_numbers.rs
index 39ad1be..ef0db1a 100644
--- a/src/parser/parse_from_text/phone_numbers.rs
+++ b/src/parser/parse_from_text/phone_numbers.rs
@@ -20,7 +20,7 @@ fn is_sdd(input: char) -> bool {
 }
 
 fn is_digit(input: char) -> bool {
-    input.is_digit(10)
+    input.is_ascii_digit()
 }
 
 fn is_digit_or_ssd(input: char) -> bool {