From 0b6846996b0f3e9a9207c059ac4137a1095322be Mon Sep 17 00:00:00 2001 From: Eugene Gershnik Date: Tue, 7 Jan 2025 10:15:10 -0800 Subject: [PATCH] Proper view interface for utf_view and grapheme_view --- CHANGELOG.md | 2 +- lib/inc/sys_string/grapheme_view.h | 12 ++++-- lib/inc/sys_string/utf_view.h | 11 +++++- test/test_grapheme.cpp | 25 ++++++++++++ test/test_utf_iteration.cpp | 61 ++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72ac66d..961a765 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Added -- `grapheme_view` which provides ability to iterate over grapheme clusters in `sys_string` and any UTF range. +- `grapheme_view` and `graphemes` adapter which provide ability to iterate over grapheme clusters in `sys_string` and any UTF range. ### Changed diff --git a/lib/inc/sys_string/grapheme_view.h b/lib/inc/sys_string/grapheme_view.h index bab0497..8b28083 100644 --- a/lib/inc/sys_string/grapheme_view.h +++ b/lib/inc/sys_string/grapheme_view.h @@ -114,9 +114,6 @@ namespace sysstr using pointer = typename iterator::pointer; using const_pointer = pointer; - // static const bool borrowed = (ViewType == byref) || - // (ViewType == byval && std::ranges::borrowed_range); - public: grapheme_view(const Range & src) noexcept(noexcept(range(src))) : m_src(src) @@ -150,6 +147,15 @@ namespace sysstr SYS_STRING_FORCE_INLINE std::default_sentinel_t crend() const requires(is_reversible) { return rend(); } + bool empty() const + { return std::ranges::empty(m_src); } + explicit operator bool() const + { return !std::ranges::empty(m_src); } + decltype(auto) front() const + { return *this->begin();} + decltype(auto) back() const requires(is_reversible) + { return *this->rbegin();} + reverse_iterator reverse(iterator it) const requires(is_reversible) { return reverse_iterator(it, std::ranges::rend(m_src)); } diff --git a/lib/inc/sys_string/utf_view.h b/lib/inc/sys_string/utf_view.h index 6a755fb..ca0ac9d 100644 --- a/lib/inc/sys_string/utf_view.h +++ b/lib/inc/sys_string/utf_view.h @@ -289,7 +289,7 @@ namespace sysstr SYS_STRING_FORCE_INLINE static auto range_ref(const range * src) -> const range & { return *src; } - static constexpr bool is_reversible = ranges::reverse_traversable_range>; + static constexpr bool is_reversible = ranges::reverse_traversable_range; static constexpr auto source_encoding = utf_encoding_of>; using access_iterator = decltype(std::ranges::begin(range_ref(std::declval()))); @@ -358,6 +358,15 @@ namespace sysstr SYS_STRING_FORCE_INLINE std::default_sentinel_t crend() const requires(is_reversible) { return rend(); } + bool empty() const requires(std::ranges::forward_range) + { return std::ranges::empty(range_ref(m_src)); } + explicit operator bool() const requires(std::ranges::forward_range) + { return !std::ranges::empty(m_src); } + decltype(auto) front() const requires(std::ranges::forward_range) + { return *this->begin();} + decltype(auto) back() const requires(is_reversible) + { return *this->rbegin();} + reverse_iterator reverse(iterator it) const requires(is_reversible) { return reverse_iterator(it, std::ranges::rend(range_ref(m_src))); } diff --git a/test/test_grapheme.cpp b/test/test_grapheme.cpp index a136324..543a854 100644 --- a/test/test_grapheme.cpp +++ b/test/test_grapheme.cpp @@ -149,4 +149,29 @@ TEST_CASE("ranges") { check_graphemes_reverse_range(as_utf32("ab"s), {U"a", U"b"}); } +TEST_CASE("view interface") { + + auto view = graphemes("abc"s); + + CHECK(view); + CHECK(!view.empty()); + auto fr = view.front(); + CHECK(fr.size() == 1); + CHECK(fr.front() == 'a'); + auto bc = view.back(); + CHECK(bc.size() == 1); + CHECK(bc.front() == 'c'); + + CHECK(view.begin() == view.cbegin()); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + CHECK(view.rbegin() == view.crbegin()); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + + auto empty_view = graphemes(""s); + CHECK(!empty_view); + CHECK(empty_view.empty()); +} + } diff --git a/test/test_utf_iteration.cpp b/test/test_utf_iteration.cpp index cca5080..1fd8da4 100644 --- a/test/test_utf_iteration.cpp +++ b/test/test_utf_iteration.cpp @@ -750,6 +750,67 @@ TEST_CASE( "Ranges" ) { CHECK(std::ranges::equal(std::vector({u'a', u'b', u'c'}) | as_utf8, std::array{'a', 'b', 'c'})); } +TEST_CASE("view interface") { + + { + auto view = as_utf32("abc"s); + + CHECK(view); + CHECK(!view.empty()); + CHECK(view.front() == U'a'); + CHECK(view.back() == U'c'); + + CHECK(view.begin() == view.cbegin()); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + CHECK(view.rbegin() == view.crbegin()); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + + auto empty_view = as_utf32(""s); + CHECK(!empty_view); + CHECK(empty_view.empty()); + } + { + auto view = as_utf16("abc"s); + + CHECK(view); + CHECK(!view.empty()); + CHECK(view.front() == u'a'); + CHECK(view.back() == u'c'); + + CHECK(view.begin() == view.cbegin()); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + CHECK(view.rbegin() == view.crbegin()); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + + auto empty_view = as_utf32(""s); + CHECK(!empty_view); + CHECK(empty_view.empty()); + } + { + auto view = as_utf8("abc"s); + + CHECK(view); + CHECK(!view.empty()); + CHECK(view.front() == 'a'); + CHECK(view.back() == 'c'); + + CHECK(view.begin() == view.cbegin()); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + CHECK(view.rbegin() == view.crbegin()); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + + auto empty_view = as_utf32(""s); + CHECK(!empty_view); + CHECK(empty_view.empty()); + } +} + #endif }