diff --git a/include/boost/url/detail/config.hpp b/include/boost/url/detail/config.hpp index 6aa27c998..35e99f844 100644 --- a/include/boost/url/detail/config.hpp +++ b/include/boost/url/detail/config.hpp @@ -106,9 +106,9 @@ // Limit tests #ifndef BOOST_URL_MAX_SIZE -// we leave room for a null, -// and still fit in size_t -#define BOOST_URL_MAX_SIZE ((std::size_t(-1))-1) +// leave room for a null terminator and +// fit within url_impl's 32-bit offsets +#define BOOST_URL_MAX_SIZE ((std::size_t)UINT32_MAX - 1) #endif // noinline attribute diff --git a/include/boost/url/detail/url_impl.hpp b/include/boost/url/detail/url_impl.hpp index 5909de785..86c4a2b10 100644 --- a/include/boost/url/detail/url_impl.hpp +++ b/include/boost/url/detail/url_impl.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include namespace boost { @@ -36,6 +37,12 @@ constexpr char const* const empty_c_str_ = ""; // to by cs_. struct BOOST_URL_DECL url_impl : parts_base { + using size_type = std::uint32_t; + + static_assert( + BOOST_URL_MAX_SIZE <= UINT32_MAX, + "BOOST_URL_MAX_SIZE exceeds 32-bit url_impl capacity"); + static constexpr std::size_t const zero_ = 0; @@ -43,10 +50,10 @@ struct BOOST_URL_DECL url_impl : parts_base // never nullptr char const* cs_ = empty_c_str_; - std::size_t offset_[id_end + 1] = {}; - std::size_t decoded_[id_end] = {}; - std::size_t nseg_ = 0; - std::size_t nparam_ = 0; + size_type offset_[id_end + 1] = {}; + size_type decoded_[id_end] = {}; + size_type nseg_ = 0; + size_type nparam_ = 0; unsigned char ip_addr_[16] = {}; // VFALCO don't we need a bool? std::uint16_t port_number_ = 0; @@ -95,6 +102,25 @@ struct BOOST_URL_DECL url_impl : parts_base void apply_frag(pct_string_view) noexcept; }; +// url_impl stores 32-bit sizes; centralize narrowing with checks. +inline +url_impl::size_type +to_size_type(std::size_t n) noexcept +{ + BOOST_ASSERT(n <= BOOST_URL_MAX_SIZE); + BOOST_ASSERT(n <= UINT32_MAX); + return static_cast(n); +} + +inline +url_impl::size_type +to_size_type(std::ptrdiff_t n) noexcept +{ + BOOST_ASSERT(n >= 0); + return to_size_type( + static_cast(n)); +} + //------------------------------------------------ // this allows a path to come from a diff --git a/src/detail/pattern.cpp b/src/detail/pattern.cpp index e334cf381..8c51be694 100644 --- a/src/detail/pattern.cpp +++ b/src/detail/pattern.cpp @@ -166,8 +166,9 @@ apply( char const* dest1 = pct_vformat( user_chars, pctx, fctx); u.impl_.decoded_[parts::id_user] = - pct_string_view(dest, dest1 - dest) - ->decoded_size(); + detail::to_size_type( + pct_string_view(dest, dest1 - dest) + ->decoded_size()); if (has_pass) { char* destp = u.set_password_impl( @@ -177,8 +178,9 @@ apply( dest1 = pct_vformat( password_chars, pctx, fctx); u.impl_.decoded_[parts::id_pass] = - pct_string_view({destp, dest1}) - ->decoded_size() + 1; + detail::to_size_type( + pct_string_view({destp, dest1}) + ->decoded_size() + 1); } } auto dest = u.set_host_impl( @@ -193,8 +195,9 @@ apply( pct_vformat(lhost_chars, pctx, fctx); *dest1++ = ']'; u.impl_.decoded_[parts::id_host] = - pct_string_view(dest - 1, dest1 - dest) - ->decoded_size(); + detail::to_size_type( + pct_string_view(dest - 1, dest1 - dest) + ->decoded_size()); } else { @@ -203,8 +206,9 @@ apply( char const* dest1 = pct_vformat(host_chars, pctx, fctx); u.impl_.decoded_[parts::id_host] = - pct_string_view(dest, dest1 - dest) - ->decoded_size(); + detail::to_size_type( + pct_string_view(dest, dest1 - dest) + ->decoded_size()); } auto uh = u.encoded_host(); auto h = grammar::parse(uh, host_rule).value(); @@ -221,8 +225,9 @@ apply( char const* dest1 = pct_vformat( grammar::digit_chars, pctx, fctx); u.impl_.decoded_[parts::id_port] = - pct_string_view(dest, dest1 - dest) - ->decoded_size() + 1; + detail::to_size_type( + pct_string_view(dest, dest1 - dest) + ->decoded_size() + 1); core::string_view up = {dest - 1, dest1}; auto p = grammar::parse(up, detail::port_part_rule).value(); if (p.has_port) @@ -240,12 +245,14 @@ apply( path_chars, pctx, fctx); pct_string_view npath(dest, dest1 - dest); u.impl_.decoded_[parts::id_path] += - npath.decoded_size(); + detail::to_size_type( + npath.decoded_size()); if (!npath.empty()) { - u.impl_.nseg_ = std::count( - npath.begin() + 1, - npath.end(), '/') + 1; + u.impl_.nseg_ = detail::to_size_type( + std::count( + npath.begin() + 1, + npath.end(), '/') + 1); } // handle edge cases // 1) path is first component and the @@ -309,12 +316,14 @@ apply( query_chars, pctx, fctx); pct_string_view nquery(dest, dest1 - dest); u.impl_.decoded_[parts::id_query] += - nquery.decoded_size() + 1; + detail::to_size_type( + nquery.decoded_size() + 1); if (!nquery.empty()) { - u.impl_.nparam_ = std::count( - nquery.begin(), - nquery.end(), '&') + 1; + u.impl_.nparam_ = detail::to_size_type( + std::count( + nquery.begin(), + nquery.end(), '&') + 1); } } if (has_frag) @@ -328,9 +337,10 @@ apply( auto dest1 = pct_vformat( fragment_chars, pctx, fctx); u.impl_.decoded_[parts::id_frag] += - make_pct_string_view( - core::string_view(dest, dest1 - dest)) - ->decoded_size() + 1; + detail::to_size_type( + make_pct_string_view( + core::string_view(dest, dest1 - dest)) + ->decoded_size() + 1); } } @@ -944,4 +954,3 @@ parse_pattern( } // detail } // urls } // boost - diff --git a/src/detail/url_impl.cpp b/src/detail/url_impl.cpp index 2731bf6fb..3dc371161 100644 --- a/src/detail/url_impl.cpp +++ b/src/detail/url_impl.cpp @@ -52,13 +52,15 @@ apply_userinfo( // userinfo set_size(id_user, user.size()); decoded_[id_user] = - user.decoded_size(); + detail::to_size_type( + user.decoded_size()); if(pass) { set_size(id_pass, pass->size() + 2); decoded_[id_pass] = - pass->decoded_size(); + detail::to_size_type( + pass->decoded_size()); } else { @@ -82,7 +84,8 @@ apply_host( host_type_ = ht; set_size(id_host, s.size()); decoded_[id_host] = - s.decoded_size(); + detail::to_size_type( + s.decoded_size()); std::memcpy( ip_addr_, addr, @@ -137,8 +140,11 @@ apply_path( std::size_t nseg) noexcept { set_size(id_path, s.size()); - decoded_[id_path] = s.decoded_size(); - nseg_ = detail::path_segments(s, nseg); + decoded_[id_path] = + detail::to_size_type( + s.decoded_size()); + nseg_ = detail::to_size_type( + detail::path_segments(s, nseg)); } void @@ -147,9 +153,11 @@ apply_query( pct_string_view s, std::size_t n) noexcept { - nparam_ = n; + nparam_ = detail::to_size_type(n); set_size(id_query, 1 + s.size()); - decoded_[id_query] = s.decoded_size(); + decoded_[id_query] = + detail::to_size_type( + s.decoded_size()); } void @@ -158,7 +166,9 @@ apply_frag( pct_string_view s) noexcept { set_size(id_frag, s.size() + 1); - decoded_[id_frag] = s.decoded_size(); + decoded_[id_frag] = + detail::to_size_type( + s.decoded_size()); } // return length of [first, last) @@ -255,10 +265,19 @@ set_size( int id, std::size_t n) noexcept { - auto d = n - len(id); + auto const cur = len(id); + if(n >= cur) + { + auto const d = n - cur; + for(auto i = id + 1; + i <= id_end; ++i) + offset_[i] += detail::to_size_type(d); + return; + } + auto const d = cur - n; for(auto i = id + 1; i <= id_end; ++i) - offset_[i] += d; + offset_[i] -= detail::to_size_type(d); } // trim id to size n, @@ -271,7 +290,8 @@ split( { BOOST_ASSERT(id < id_end - 1); //BOOST_ASSERT(n <= len(id)); - offset_[id + 1] = offset(id) + n; + offset_[id + 1] = detail::to_size_type( + offset(id) + n); } // add n to [first, last] @@ -284,7 +304,7 @@ adjust_right( { for(int i = first; i <= last; ++i) - offset_[i] += n; + offset_[i] += detail::to_size_type(n); } // remove n from [first, last] @@ -297,7 +317,7 @@ adjust_left( { for(int i = first; i <= last; ++i) - offset_[i] -= n; + offset_[i] -= detail::to_size_type(n); } // set [first, last) offset @@ -310,7 +330,7 @@ collapse( { for(int i = first + 1; i < last; ++i) - offset_[i] = n; + offset_[i] = detail::to_size_type(n); } diff --git a/src/url_base.cpp b/src/url_base.cpp index 149bcb7c0..aea1a2213 100644 --- a/src/url_base.cpp +++ b/src/url_base.cpp @@ -371,14 +371,26 @@ set_userinfo( // find ':' in plain string auto const pos2 = s.find_first_of(':'); - impl_.decoded_[id_user] = - pos2 - 1; - impl_.decoded_[id_pass] = - s.size() - pos2; + if(pos2 != core::string_view::npos) + { + // pos2 is the ':' index in plain input (user[:pass]) + // decoded user is [0, pos2), decoded pass is (pos2, end]. + impl_.decoded_[id_user] = + detail::to_size_type(pos2); + impl_.decoded_[id_pass] = + detail::to_size_type(s.size() - pos2 - 1); + } + else + { + impl_.decoded_[id_user] = + detail::to_size_type(s.size()); + impl_.decoded_[id_pass] = 0; + } } else { - impl_.decoded_[id_user] = s.size(); + impl_.decoded_[id_user] = + detail::to_size_type(s.size()); impl_.decoded_[id_pass] = 0; } return *this; @@ -406,18 +418,18 @@ set_encoded_userinfo( auto dest = set_userinfo_impl(n0 + n1 + 1, op); impl_.decoded_[id_user] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, dest + n0, s0, - detail::user_chars); + detail::user_chars)); *dest++ = ':'; impl_.decoded_[id_pass] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, dest + n1, s1, - detail::password_chars); + detail::password_chars)); impl_.split(id_user, 2 + n0); } else @@ -428,11 +440,11 @@ set_encoded_userinfo( s, detail::user_chars); auto dest = set_userinfo_impl(n, op); impl_.decoded_[id_user] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, dest + n, s, - detail::user_chars); + detail::user_chars)); impl_.split(id_user, 2 + n); impl_.decoded_[id_pass] = 0; } @@ -472,7 +484,8 @@ set_user(core::string_view s) s, detail::user_chars, opt); - impl_.decoded_[id_user] = s.size(); + impl_.decoded_[id_user] = + detail::to_size_type(s.size()); return *this; } @@ -487,11 +500,11 @@ set_encoded_user( s, detail::user_chars); auto dest = set_user_impl(n, op); impl_.decoded_[id_user] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, dest + n, s, - detail::user_chars); + detail::user_chars)); BOOST_ASSERT( impl_.decoded_[id_user] == s.decoded_size()); @@ -515,7 +528,8 @@ set_password(core::string_view s) s, detail::password_chars, opt); - impl_.decoded_[id_pass] = s.size(); + impl_.decoded_[id_pass] = + detail::to_size_type(s.size()); return *this; } @@ -531,11 +545,11 @@ set_encoded_password( detail::password_chars); auto dest = set_password_impl(n, op); impl_.decoded_[id_pass] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, dest + n, s, - detail::password_chars); + detail::password_chars)); BOOST_ASSERT( impl_.decoded_[id_pass] == s.decoded_size()); @@ -661,7 +675,8 @@ set_host( s, detail::host_chars, opt); - impl_.decoded_[id_host] = s.size(); + impl_.decoded_[id_host] = + detail::to_size_type(s.size()); impl_.host_type_ = urls::host_type::name; return *this; @@ -735,11 +750,11 @@ set_encoded_host( s, detail::host_chars); auto dest = set_host_impl(n, op); impl_.decoded_[id_host] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, impl_.get(id_path).data(), s, - detail::host_chars); + detail::host_chars)); BOOST_ASSERT(impl_.decoded_[id_host] == s.decoded_size()); impl_.host_type_ = @@ -809,7 +824,8 @@ set_host_address( s, detail::host_chars, opt); - impl_.decoded_[id_host] = s.size(); + impl_.decoded_[id_host] = + detail::to_size_type(s.size()); impl_.host_type_ = urls::host_type::name; return *this; @@ -879,11 +895,11 @@ set_encoded_host_address( s, detail::host_chars); auto dest = set_host_impl(n, op); impl_.decoded_[id_host] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, impl_.get(id_path).data(), s, - detail::host_chars); + detail::host_chars)); BOOST_ASSERT(impl_.decoded_[id_host] == s.decoded_size()); impl_.host_type_ = @@ -901,7 +917,8 @@ set_host_ipv4( auto s = addr.to_buffer(buf, sizeof(buf)); auto dest = set_host_impl(s.size(), op); std::memcpy(dest, s.data(), s.size()); - impl_.decoded_[id_host] = impl_.len(id_host); + impl_.decoded_[id_host] = + detail::to_size_type(impl_.len(id_host)); impl_.host_type_ = urls::host_type::ipv4; auto bytes = addr.to_bytes(); std::memcpy( @@ -964,7 +981,8 @@ set_host_ipv6_and_zone_id( } *dest++ = ']'; // ipn + |"["| + |"]"| + (has_zone_id ? |"%"| + zn : 0) - impl_.decoded_[id_host] = ipn + 2 + has_zone_id * (1 + zone_id.size()); + impl_.decoded_[id_host] = detail::to_size_type( + ipn + 2 + has_zone_id * (1 + zone_id.size())); impl_.host_type_ = urls::host_type::ipv6; auto bytes = addr.to_bytes(); std::memcpy( @@ -1000,7 +1018,8 @@ set_host_ipv6_and_encoded_zone_id( } *dest++ = ']'; // ipn + |"["| + |"]"| + (has_zone_id ? |"%"| + zn : 0) - impl_.decoded_[id_host] = ipn + 2 + has_zone_id * (1 + dzn); + impl_.decoded_[id_host] = detail::to_size_type( + ipn + 2 + has_zone_id * (1 + dzn)); impl_.host_type_ = urls::host_type::ipv6; auto bytes = addr.to_bytes(); std::memcpy( @@ -1026,7 +1045,8 @@ set_host_ipvfuture( *dest = ']'; impl_.host_type_ = urls::host_type::ipvfuture; - impl_.decoded_[id_host] = s.size() + 2; + impl_.decoded_[id_host] = + detail::to_size_type(s.size() + 2); return *this; } @@ -1059,7 +1079,8 @@ set_host_name( opt); impl_.host_type_ = urls::host_type::name; - impl_.decoded_[id_host] = s.size(); + impl_.decoded_[id_host] = + detail::to_size_type(s.size()); return *this; } @@ -1084,11 +1105,11 @@ set_encoded_host_name( s, allowed); auto dest = set_host_impl(n, op); impl_.decoded_[id_host] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, dest + n, s, - allowed); + allowed)); BOOST_ASSERT( impl_.decoded_[id_host] == s.decoded_size()); @@ -1332,7 +1353,8 @@ set_path( s.substr(first_seg.size()), detail::path_chars, opt); - impl_.decoded_[id_path] += s.size(); + impl_.decoded_[id_path] += + detail::to_size_type(s.size()); BOOST_ASSERT(!dest || dest == impl_.get(id_query).data()); BOOST_ASSERT( impl_.decoded_[id_path] == @@ -1353,8 +1375,9 @@ set_path( if (s.starts_with("/./")) s = s.substr(2); // count segments as number of '/'s + 1 - impl_.nseg_ = std::count( - s.begin() + 1, s.end(), '/') + 1; + impl_.nseg_ = detail::to_size_type( + std::count( + s.begin() + 1, s.end(), '/') + 1); } else { @@ -1435,17 +1458,17 @@ set_encoded_path( impl_.decoded_[id_path] += 2; } impl_.decoded_[id_path] += - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, impl_.get(id_query).data(), first_seg, - detail::segment_chars - ':'); + detail::segment_chars - ':')); impl_.decoded_[id_path] += - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, impl_.get(id_query).data(), s.substr(first_seg.size()), - detail::path_chars); + detail::path_chars)); BOOST_ASSERT(dest == impl_.get(id_query).data()); BOOST_ASSERT( impl_.decoded_[id_path] == @@ -1466,8 +1489,9 @@ set_encoded_path( if (s.starts_with("/./")) s = s.substr(2); // count segments as number of '/'s + 1 - impl_.nseg_ = std::count( - s.begin() + 1, s.end(), '/') + 1; + impl_.nseg_ = detail::to_size_type( + std::count( + s.begin() + 1, s.end(), '/') + 1); } else { @@ -1554,15 +1578,16 @@ set_encoded_query( // encode impl_.decoded_[id_query] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, dest + n, s, - detail::query_chars); + detail::query_chars)); BOOST_ASSERT( impl_.decoded_[id_query] == s.decoded_size()); - impl_.nparam_ = nparam; + impl_.nparam_ = + detail::to_size_type(nparam); return *this; } @@ -1654,7 +1679,8 @@ set_fragment(core::string_view s) s, detail::fragment_chars, opt); - impl_.decoded_[id_frag] = s.size(); + impl_.decoded_[id_frag] = + detail::to_size_type(s.size()); return *this; } @@ -1672,11 +1698,11 @@ set_encoded_fragment( id_frag, n + 1, op); *dest++ = '#'; impl_.decoded_[id_frag] = - detail::re_encode_unsafe( + detail::to_size_type(detail::re_encode_unsafe( dest, dest + n, s, - detail::fragment_chars); + detail::fragment_chars)); BOOST_ASSERT( impl_.decoded_[id_frag] == s.decoded_size()); @@ -2065,12 +2091,14 @@ normalize_path() if (p == "/") impl_.nseg_ = 0; else if (!p.empty()) - impl_.nseg_ = std::count( - p.begin() + 1, p.end(), '/') + 1; + impl_.nseg_ = detail::to_size_type( + std::count( + p.begin() + 1, p.end(), '/') + 1); else impl_.nseg_ = 0; impl_.decoded_[id_path] = - detail::decode_bytes_unsafe(impl_.get(id_path)); + detail::to_size_type(detail::decode_bytes_unsafe( + impl_.get(id_path))); } return *this; } @@ -2777,8 +2805,14 @@ edit_segments( impl_.len(id_path) + nchar - nremove); BOOST_ASSERT(size() == new_size); end = dest + nchar; - impl_.nseg_ = impl_.nseg_ + nseg - ( - it1.index - it0.index) - cp_src_prefix; + auto const nseg1 = + static_cast(impl_.nseg_) + + static_cast(nseg) - + static_cast(it1.index) + + static_cast(it0.index) - + static_cast(cp_src_prefix); + BOOST_ASSERT(nseg1 >= 0); + impl_.nseg_ = detail::to_size_type(nseg1); if(s_) s_[size()] = '\0'; } @@ -2827,7 +2861,12 @@ edit_segments( auto const dn = detail::decode_bytes_unsafe( core::string_view(dest0, dest - dest0)); - impl_.decoded_[id_path] += dn - dn0; + if(dn >= dn0) + impl_.decoded_[id_path] += + detail::to_size_type(dn - dn0); + else + impl_.decoded_[id_path] -= + detail::to_size_type(dn0 - dn); return detail::segments_iter_impl( impl_, pos0, it0.index); @@ -2862,12 +2901,16 @@ edit_params( // calc decoded size of old range, // minus one if '?' or '&' prefixed - auto const dn0 = - detail::decode_bytes_unsafe( - core::string_view( - impl_.cs_ + pos0, - pos1 - pos0)) - ( - impl_.len(id_query) > 0); + auto dn0 = + static_cast( + detail::decode_bytes_unsafe( + core::string_view( + impl_.cs_ + pos0, + pos1 - pos0))); + if(impl_.len(id_query) > 0) + dn0 -= 1; + if(dn0 < 0) + dn0 = 0; //------------------------------------------------ // @@ -2908,8 +2951,11 @@ edit_params( detail::throw_length_error(); } auto const nparam1 = - impl_.nparam_ + nparam - ( - it1.index - it0.index); + static_cast(impl_.nparam_) + + static_cast(nparam) - + static_cast(it1.index) + + static_cast(it0.index); + BOOST_ASSERT(nparam1 >= 0); reserve_impl(size() + nchar - nremove, op); dest = s_ + pos0; end = dest + nchar; @@ -2927,7 +2973,8 @@ edit_params( id_query, impl_.len(id_query) + nchar - nremove); - impl_.nparam_ = nparam1; + impl_.nparam_ = + detail::to_size_type(nparam1); if(nparam1 > 0) { // needed when we erase @@ -2963,12 +3010,21 @@ edit_params( // calc decoded size of new range, // minus one if '?' or '&' prefixed - auto const dn = - detail::decode_bytes_unsafe( - core::string_view(dest0, dest - dest0)) - ( - impl_.len(id_query) > 0); - - impl_.decoded_[id_query] += (dn - dn0); + auto dn = + static_cast( + detail::decode_bytes_unsafe( + core::string_view(dest0, dest - dest0))); + if(impl_.len(id_query) > 0) + dn -= 1; + if(dn < 0) + dn = 0; + + if(dn >= dn0) + impl_.decoded_[id_query] += + detail::to_size_type(dn - dn0); + else + impl_.decoded_[id_query] -= + detail::to_size_type(dn0 - dn); return detail::params_iter_impl( impl_,