Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Row Data Stored On The Heap #1009

Merged
merged 1 commit into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions crates/air_utils/src/lookup_data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ mod tests {
.iter_mut()
.zip(arr.chunks(N_LANES))
.zip(lookup_data.iter_mut())
.for_each(|((row, input), lookup_data)| {
.for_each(|((mut row, input), lookup_data)| {
*row[0] = PackedM31::from_array(input.try_into().unwrap());
*row[1] = *row[0] + PackedM31::broadcast(M31(1));
*row[2] = *row[0] + *row[1];
Expand Down Expand Up @@ -132,7 +132,7 @@ mod tests {
.par_iter_mut()
.zip(arr.par_chunks(N_LANES).into_par_iter())
.zip(lookup_data.par_iter_mut())
.for_each(|((row, input), lookup_data)| {
.for_each(|((mut row, input), lookup_data)| {
*row[0] = PackedM31::from_array(input.try_into().unwrap());
*row[1] = *row[0] + PackedM31::broadcast(M31(1));
*row[2] = *row[0] + *row[1];
Expand Down
23 changes: 17 additions & 6 deletions crates/air_utils/src/trace/component_trace.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use bytemuck::Zeroable;
use itertools::Itertools;
use stwo_prover::core::backend::simd::column::BaseColumn;
use stwo_prover::core::backend::simd::m31::{PackedM31, LOG_N_LANES, N_LANES};
use stwo_prover::core::backend::simd::SimdBackend;
Expand Down Expand Up @@ -37,7 +38,7 @@ use super::row_iterator::{ParRowIterMut, RowIterMut};
/// .chunks(4)
/// .into_iter()
/// .for_each(|chunk| {
/// chunk.into_iter().for_each(|(row, input)| {
/// chunk.into_iter().for_each(|(mut row, input)| {
/// *row[0] = PackedM31::from_array(input.try_into().unwrap());
/// *row[1] = *row[0] + PackedM31::broadcast(M31(1));
/// *row[2] = row[0].square() + row[1].square();
Expand Down Expand Up @@ -102,12 +103,22 @@ impl<const N: usize> ComponentTrace<N> {
self.log_size
}

pub fn iter_mut(&mut self) -> RowIterMut<'_, N> {
RowIterMut::new(self.data.each_mut().map(|column| column.as_mut_slice()))
pub fn iter_mut(&mut self) -> RowIterMut<'_> {
RowIterMut::new(
self.data
.iter_mut()
.map(|col| col.as_mut_slice())
.collect_vec(),
)
}

pub fn par_iter_mut(&mut self) -> ParRowIterMut<'_, N> {
ParRowIterMut::new(self.data.each_mut().map(|column| column.as_mut_slice()))
pub fn par_iter_mut(&mut self) -> ParRowIterMut<'_> {
ParRowIterMut::new(
self.data
.iter_mut()
.map(|col| col.as_mut_slice())
.collect_vec(),
)
}

pub fn to_evals(self) -> [CircleEvaluation<SimdBackend, M31, BitReversedOrder>; N] {
Expand Down Expand Up @@ -161,7 +172,7 @@ mod tests {
.zip(arr.par_chunks(N_LANES))
.chunks(CHUNK_SIZE)
.for_each(|chunk| {
chunk.into_iter().for_each(|(row, input)| {
chunk.into_iter().for_each(|(mut row, input)| {
*row[0] = PackedM31::from_array(input.try_into().unwrap());
*row[1] = *row[0] + PackedM31::broadcast(M31(1));
*row[2] = row[0].square() + row[1].square();
Expand Down
92 changes: 50 additions & 42 deletions crates/air_utils/src/trace/row_iterator.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,44 @@
use std::marker::PhantomData;

use itertools::Itertools;
use rayon::iter::plumbing::{bridge, Consumer, Producer, ProducerCallback, UnindexedConsumer};
use rayon::prelude::*;
use stwo_prover::core::backend::simd::m31::PackedM31;

pub type MutRow<'trace, const N: usize> = [&'trace mut PackedM31; N];
pub type MutRow<'trace> = Vec<&'trace mut PackedM31>;

/// An iterator over mutable references to the rows of a [`super::component_trace::ComponentTrace`].
// TODO(Ohad): Iterating over single rows is not optimal, figure out optimal chunk size when using
// this iterator.
pub struct RowIterMut<'trace, const N: usize> {
v: [*mut [PackedM31]; N],
pub struct RowIterMut<'trace> {
v: Vec<*mut [PackedM31]>,
phantom: PhantomData<&'trace ()>,
}
impl<'trace, const N: usize> RowIterMut<'trace, N> {
pub fn new(slice: [&'trace mut [PackedM31]; N]) -> Self {
impl<'trace> RowIterMut<'trace> {
pub fn new(slice: Vec<&'trace mut [PackedM31]>) -> Self {
Self {
v: slice.map(|s| s as *mut _),
v: slice.into_iter().map(|s| s as *mut _).collect_vec(),
phantom: PhantomData,
}
}
}
impl<'trace, const N: usize> Iterator for RowIterMut<'trace, N> {
type Item = MutRow<'trace, N>;
impl<'trace> Iterator for RowIterMut<'trace> {
type Item = MutRow<'trace>;

fn next(&mut self) -> Option<Self::Item> {
if self.v[0].is_empty() {
return None;
}
let item = std::array::from_fn(|i| unsafe {
// SAFETY: The self.v contract ensures that any split_at_mut is valid.
let (head, tail) = self.v[i].split_at_mut(1);
self.v[i] = tail;
&mut (*head)[0]
});
let item: Vec<&mut PackedM31> = self
.v
.iter_mut()
.map(|col_chunk| unsafe {
// SAFETY: The self.v contract ensures that any split_at_mut is valid.
let (head, tail) = col_chunk.split_at_mut(1);
*col_chunk = tail;
&mut (*head)[0]
})
.collect_vec();
Some(item)
}

Expand All @@ -42,44 +47,47 @@ impl<'trace, const N: usize> Iterator for RowIterMut<'trace, N> {
(len, Some(len))
}
}
impl<const N: usize> ExactSizeIterator for RowIterMut<'_, N> {}
impl<const N: usize> DoubleEndedIterator for RowIterMut<'_, N> {
impl ExactSizeIterator for RowIterMut<'_> {}
impl DoubleEndedIterator for RowIterMut<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.v[0].is_empty() {
return None;
}
let item = std::array::from_fn(|i| unsafe {
// SAFETY: The self.v contract ensures that any split_at_mut is valid.
let (head, tail) = self.v[i].split_at_mut(self.v[i].len() - 1);
self.v[i] = head;
&mut (*tail)[0]
});
let item: Vec<&mut PackedM31> = self
.v
.iter_mut()
.map(|col_chunk| unsafe {
// SAFETY: The self.v contract ensures that any split_at_mut is valid.
let (head, tail) = col_chunk.split_at_mut(col_chunk.len() - 1);
*col_chunk = head;
&mut (*tail)[0]
})
.collect_vec();
Some(item)
}
}

struct RowProducer<'trace, const N: usize> {
data: [&'trace mut [PackedM31]; N],
struct RowProducer<'trace> {
data: Vec<&'trace mut [PackedM31]>,
}
impl<'trace, const N: usize> Producer for RowProducer<'trace, N> {
type Item = MutRow<'trace, N>;
impl<'trace> Producer for RowProducer<'trace> {
type Item = MutRow<'trace>;

fn split_at(self, index: usize) -> (Self, Self) {
let mut left: [_; N] = unsafe { std::mem::zeroed() };
let mut right: [_; N] = unsafe { std::mem::zeroed() };
for (i, slice) in self.data.into_iter().enumerate() {
let (lhs, rhs) = slice.split_at_mut(index);
left[i] = lhs;
right[i] = rhs;
}
let (left, right): (Vec<_>, Vec<_>) = self
.data
.into_iter()
.map(|slice| slice.split_at_mut(index))
.unzip();

(RowProducer { data: left }, RowProducer { data: right })
}

type IntoIter = RowIterMut<'trace, N>;
type IntoIter = RowIterMut<'trace>;

fn into_iter(self) -> Self::IntoIter {
RowIterMut {
v: self.data.map(|s| s as *mut _),
v: self.data.into_iter().map(|s| s as *mut _).collect_vec(),
phantom: PhantomData,
}
}
Expand All @@ -89,16 +97,16 @@ impl<'trace, const N: usize> Producer for RowProducer<'trace, N> {
/// [`super::component_trace::ComponentTrace`]. [`super::component_trace::ComponentTrace`] is an
/// array of columns, hence iterating over rows is not trivial. Iteration is done by iterating over
/// `N` columns in parallel.
pub struct ParRowIterMut<'trace, const N: usize> {
data: [&'trace mut [PackedM31]; N],
pub struct ParRowIterMut<'trace> {
data: Vec<&'trace mut [PackedM31]>,
}
impl<'trace, const N: usize> ParRowIterMut<'trace, N> {
pub(super) fn new(data: [&'trace mut [PackedM31]; N]) -> Self {
impl<'trace> ParRowIterMut<'trace> {
pub(super) fn new(data: Vec<&'trace mut [PackedM31]>) -> Self {
Self { data }
}
}
impl<'trace, const N: usize> ParallelIterator for ParRowIterMut<'trace, N> {
type Item = MutRow<'trace, N>;
impl<'trace> ParallelIterator for ParRowIterMut<'trace> {
type Item = MutRow<'trace>;

fn drive_unindexed<D>(self, consumer: D) -> D::Result
where
Expand All @@ -111,7 +119,7 @@ impl<'trace, const N: usize> ParallelIterator for ParRowIterMut<'trace, N> {
Some(self.len())
}
}
impl<const N: usize> IndexedParallelIterator for ParRowIterMut<'_, N> {
impl IndexedParallelIterator for ParRowIterMut<'_> {
fn len(&self) -> usize {
self.data[0].len()
}
Expand Down