tokio-rs · mzabaluev · Mar 31, 2024 · Nov 21, 2024 · Nov 24, 2024 · Nov 25, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -152,6 +152,37 @@ jobs:
       - name: test no-default-features
         run: cargo test -p prost-build -p prost-derive -p prost-types --no-default-features
 
+  tests-overflow:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: i686-unknown-linux-gnu
+      - name: install multilib support for GCC
+        run: sudo apt-get install -y gcc-multilib
+      - name: install protoc
+        uses: taiki-e/install-action@v2
+        with:
+          tool: protoc@${{ env.PROTOC_VERSION }}
+      - uses: Swatinem/rust-cache@v2
+      - name: test for integer overflows
+        run: |
+          cargo test -p tests-overflow --target i686-unknown-linux-gnu -- \
+            --skip encoded_len::overflow::field::int32::encoded_len_packed_can_overflow_u32 \
+            --skip encoded_len::overflow::field::int32::encoded_len_repeated_can_overflow_u32 \
+            --include-ignored --test-threads 1
+      - name: run tests with large allocations in isolation
+        run: |
+          for test in \
+            encoded_len::overflow::field::int32::encoded_len_packed_can_overflow_u32 \
+            encoded_len::overflow::field::int32::encoded_len_repeated_can_overflow_u32 \
+            ;
+          do
+            cargo test -p tests-overflow --target i686-unknown-linux-gnu -- \
+              --ignored --exact $test
+          done
+
   msrv:
     runs-on: ubuntu-latest
     steps:

diff --git a/Cargo.toml b/Cargo.toml
@@ -10,6 +10,7 @@ members = [
   "tests",
   "tests-2015",
   "tests-no-std",
+  "tests/overflow",
   "tests/single-include",
   "fuzz",
 ]
@@ -31,3 +32,14 @@ edition = "2021"
 
 [profile.bench]
 debug = true
+
+[profile.test.package.tests-overflow]
+# Optimize tests to not spend stupid amounts of time looping through repetitive
+# encoded fields used for length padding.
+opt-level = 1
+# Code under test is expected to explicitly check for overflows and panic,
+# so disable built-in checks.
+overflow-checks = false
+
+[profile.test.package.prost]
+overflow-checks = false
diff --git a/prost-derive/src/field/mod.rs b/prost-derive/src/field/mod.rs
@@ -13,6 +13,10 @@ use quote::quote;
 use syn::punctuated::Punctuated;
 use syn::{Attribute, Expr, ExprLit, Lit, LitBool, LitInt, Meta, MetaNameValue, Token};
 
+// Maximum possible length of an encoded field key.
+// Verified by the max_key_len test in prost.
+const MAX_KEY_LEN: usize = 5;
+
 #[derive(Clone)]
 pub enum Field {
     /// A scalar field.
@@ -122,6 +126,24 @@ impl Field {
         }
     }
 
+    /// Returns the upper bound on the encoded length of the field
+    /// when it is known statically.
+    pub fn encoded_len_limit(&self) -> Option<usize> {
+        match self {
+            Field::Scalar(scalar) => scalar.encoded_len_limit(),
+            Field::Map(_) => None,
+            Field::Message(_) | Field::Oneof(_) | Field::Group(_) => {
+                // A limit could be determined when the field is singular,
+                // all its constituent fields are also singular and have a known
+                // limit on the encoded length.
+                // But there is no way to know this in the derive macro,
+                // and we should not trust the invoker to set it correctly with
+                // e.g. a field attribute.
+                None
+            }
+        }
+    }
+
     /// Returns a statement which clears the field.
     pub fn clear(&self, ident: TokenStream) -> TokenStream {
         match *self {

diff --git a/prost-derive/src/field/scalar.rs b/prost-derive/src/field/scalar.rs
@@ -5,7 +5,7 @@ use proc_macro2::{Span, TokenStream};
 use quote::{quote, ToTokens, TokenStreamExt};
 use syn::{parse_str, Expr, ExprLit, Ident, Index, Lit, LitByteStr, Meta, MetaNameValue, Path};
 
-use crate::field::{bool_attr, set_option, tag_attr, Label};
+use crate::field::{bool_attr, set_option, tag_attr, Label, MAX_KEY_LEN};
 
 /// A scalar protobuf field.
 #[derive(Clone)]
@@ -189,6 +189,15 @@ impl Field {
         }
     }
 
+    pub fn encoded_len_limit(&self) -> Option<usize> {
+        match &self.kind {
+            Kind::Plain(_) | Kind::Optional(_) | Kind::Required(_) => {
+                self.ty.encoded_len_limit().map(|limit| MAX_KEY_LEN + limit)
+            }
+            Kind::Repeated | Kind::Packed => None,
+        }
+    }
+
     pub fn clear(&self, ident: TokenStream) -> TokenStream {
         match self.kind {
             Kind::Plain(ref default) | Kind::Required(ref default) => {
@@ -570,6 +579,23 @@ impl Ty {
     pub fn is_numeric(&self) -> bool {
         !matches!(self, Ty::String | Ty::Bytes(..))
     }
+
+    pub fn encoded_len_limit(&self) -> Option<usize> {
+        match self {
+            Ty::Bool => Some(1),
+            // varint64
+            Ty::Int32 | Ty::Int64 | Ty::Uint64 | Ty::Sint64 | Ty::Enumeration(..) => Some(10),
+            // varint32
+            Ty::Uint32 | Ty::Sint32 => Some(5),
+            // fixed32
+            Ty::Fixed32 | Ty::Sfixed32 | Ty::Float => Some(4),
+            // fixed64
+            Ty::Fixed64 | Ty::Sfixed64 | Ty::Double => Some(8),
+            // length delimited
+            Ty::String => None,
+            Ty::Bytes(..) => None,
+        }
+    }
 }
 
 impl fmt::Debug for Ty {

diff --git a/prost-derive/src/lib.rs b/prost-derive/src/lib.rs
@@ -6,7 +6,7 @@ extern crate alloc;
 extern crate proc_macro;
 
 use anyhow::{bail, Error};
-use itertools::Itertools;
+use itertools::{Either, Itertools};
 use proc_macro2::{Span, TokenStream};
 use quote::quote;
 use syn::{
@@ -103,9 +103,25 @@ fn try_message(input: TokenStream) -> Result<TokenStream, Error> {
         )
     };
 
-    let encoded_len = fields
+    // For encoded_len, split the fields into those that have a known length limit
+    // and those that don't. The sum of the known lengths should not overflow usize.
+    // For purposes of testing, we want both lists to be in declaration order.
+    let mut total_limit = 0usize;
+    let (encoded_len_limited, encoded_len_unlimited): (Vec<_>, Vec<_>) = unsorted_fields
         .iter()
-        .map(|(field_ident, field)| field.encoded_len(quote!(self.#field_ident)));
+        .partition_map(move |(field_ident, field)| {
+            let encoded_len_expr = field.encoded_len(quote!(self.#field_ident));
+            match field
+                .encoded_len_limit()
+                .and_then(|limit| total_limit.checked_add(limit))
+            {
+                Some(sum) => {
+                    total_limit = sum;
+                    Either::Left(encoded_len_expr)
+                }
+                None => Either::Right(encoded_len_expr),
+            }
+        });
 
     let encode = fields
         .iter()
@@ -196,8 +212,12 @@ fn try_message(input: TokenStream) -> Result<TokenStream, Error> {
             }
 
             #[inline]
+            #[allow(unused_mut)]
             fn encoded_len(&self) -> usize {
-                0 #(+ #encoded_len)*
+                let mut acc = 0usize #(+ #encoded_len_limited)*;
+                #(acc = acc.checked_add(#encoded_len_unlimited)
+                    .expect("encoded length overflows usize");)*
+                acc
             }
 
             fn clear(&mut self) {