From 58d8e8e9396aa44019dd0ec9d259f6536da66c68 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 12 Oct 2023 08:25:22 +0200 Subject: [PATCH 1/4] add some comments and some cleanup around Miri intptrcast --- src/intptrcast.rs | 37 ++++++++++++++++++------------------- src/machine.rs | 18 ++++++++++++------ 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/intptrcast.rs b/src/intptrcast.rs index 4fd0af3530..154d86375c 100644 --- a/src/intptrcast.rs +++ b/src/intptrcast.rs @@ -119,24 +119,14 @@ impl<'mir, 'tcx> GlobalStateInner { Ok(()) } - pub fn ptr_from_addr_transmute( - _ecx: &MiriInterpCx<'mir, 'tcx>, - addr: u64, - ) -> Pointer> { - trace!("Transmuting {:#x} to a pointer", addr); - - // We consider transmuted pointers to be "invalid" (`None` provenance). - Pointer::new(None, Size::from_bytes(addr)) - } - pub fn ptr_from_addr_cast( ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64, ) -> InterpResult<'tcx, Pointer>> { trace!("Casting {:#x} to a pointer", addr); + // Potentially emit a warning. let global_state = ecx.machine.intptrcast.borrow(); - match global_state.provenance_mode { ProvenanceMode::Default => { // The first time this happens at a particular location, print a warning. @@ -158,7 +148,12 @@ impl<'mir, 'tcx> GlobalStateInner { ProvenanceMode::Permissive => {} } - // This is how wildcard pointers are born. + // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is + // completely legal to do a cast and then `wrapping_offset` to another allocation and only + // *then* do a memory access. So the allocation that the pointer happens to point to on a + // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that + // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed) + // allocation it might be referencing. Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr))) } @@ -219,22 +214,27 @@ impl<'mir, 'tcx> GlobalStateInner { }) } - /// Convert a relative (tcx) pointer to an absolute address. - pub fn rel_ptr_to_addr( + /// Convert a relative (tcx) pointer to a Miri pointer. + pub fn ptr_from_rel_ptr( ecx: &MiriInterpCx<'mir, 'tcx>, ptr: Pointer, - ) -> InterpResult<'tcx, u64> { + tag: BorTag, + ) -> InterpResult<'tcx, Pointer> { let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance) let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id)?; // Add offset with the right kind of pointer-overflowing arithmetic. let dl = ecx.data_layout(); - Ok(dl.overflowing_offset(base_addr, offset.bytes()).0) + let absolute_addr = dl.overflowing_offset(base_addr, offset.bytes()).0; + Ok(Pointer::new( + Provenance::Concrete { alloc_id, tag }, + Size::from_bytes(absolute_addr), + )) } /// When a pointer is used for a memory access, this computes where in which allocation the /// access is going. - pub fn abs_ptr_to_rel( + pub fn ptr_get_alloc( ecx: &MiriInterpCx<'mir, 'tcx>, ptr: Pointer, ) -> Option<(AllocId, Size)> { @@ -252,12 +252,11 @@ impl<'mir, 'tcx> GlobalStateInner { let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id).unwrap(); // Wrapping "addr - base_addr" - let dl = ecx.data_layout(); #[allow(clippy::cast_possible_wrap)] // we want to wrap here let neg_base_addr = (base_addr as i64).wrapping_neg(); Some(( alloc_id, - Size::from_bytes(dl.overflowing_signed_offset(addr.bytes(), neg_base_addr).0), + Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0), )) } diff --git a/src/machine.rs b/src/machine.rs index 3de2746086..96c734c550 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -1136,19 +1136,16 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { _ => {} } } - let absolute_addr = intptrcast::GlobalStateInner::rel_ptr_to_addr(ecx, ptr)?; let tag = if let Some(borrow_tracker) = &ecx.machine.borrow_tracker { borrow_tracker.borrow_mut().base_ptr_tag(ptr.provenance, &ecx.machine) } else { // Value does not matter, SB is disabled BorTag::default() }; - Ok(Pointer::new( - Provenance::Concrete { alloc_id: ptr.provenance, tag }, - Size::from_bytes(absolute_addr), - )) + intptrcast::GlobalStateInner::ptr_from_rel_ptr(ecx, ptr, tag) } + /// Called on `usize as ptr` casts. #[inline(always)] fn ptr_from_addr_cast( ecx: &MiriInterpCx<'mir, 'tcx>, @@ -1157,6 +1154,9 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr) } + /// Called on `ptr as usize` casts. + /// (Actually computing the resulting `usize` doesn't need machine help, + /// that's just `Scalar::try_to_int`.) fn expose_ptr( ecx: &mut InterpCx<'mir, 'tcx, Self>, ptr: Pointer, @@ -1174,11 +1174,17 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { /// Convert a pointer with provenance into an allocation-offset pair, /// or a `None` with an absolute address if that conversion is not possible. + /// + /// This is called when a pointer is about to be used for memory access, + /// an in-bounds check, or anything else that requires knowing which allocation it points to. + /// The resulting `AllocId` will just be used for that one step and the forgotten again + /// (i.e., we'll never turn the data returned here back into a `Pointer` that might be + /// stored in machine state). fn ptr_get_alloc( ecx: &MiriInterpCx<'mir, 'tcx>, ptr: Pointer, ) -> Option<(AllocId, Size, Self::ProvenanceExtra)> { - let rel = intptrcast::GlobalStateInner::abs_ptr_to_rel(ecx, ptr); + let rel = intptrcast::GlobalStateInner::ptr_get_alloc(ecx, ptr); rel.map(|(alloc_id, size)| { let tag = match ptr.provenance { From 7257db8faba0b82de7eef8135f28afcb8cc7362f Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 13 Oct 2023 07:41:10 +0200 Subject: [PATCH 2/4] doc comment for Provenance::Wildcard --- src/machine.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/machine.rs b/src/machine.rs index 96c734c550..f956015c80 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -169,11 +169,29 @@ impl fmt::Display for MiriMemoryKind { /// Pointer provenance. #[derive(Clone, Copy)] pub enum Provenance { + /// For pointers with concrete provenance. we exactly know which allocation they are attached to + /// and what their borrow tag is. Concrete { alloc_id: AllocId, /// Borrow Tracker tag. tag: BorTag, }, + /// Pointers with wildcard provenance are created on int-to-ptr casts. According to the + /// specification, we should at that point angelically "guess" a provenance that will make all + /// future uses of this pointer work, if at all possible. Of course such a semantics cannot be + /// actually implemented in Miri. So instead, we approximate this, erroring on the side of + /// accepting too much code rather than rejecting correct code: a pointer with wildcard + /// provenance "acts like" any previously exposed pointer. Each time it is used, we check + /// whether *some* exposed pointer could have done what we want to do, and if the answer is yes + /// then we allow the access. This allows too much code in two ways: + /// - The same wildcard pointer can "take the role" of multiple different exposed pointers on + /// subsequenct memory accesses. + /// - In the aliasing model, we don't just have to know the borrow tag of the pointer used for + /// the access, we also have to update the aliasing state -- and that update can be very + /// different depending on which borrow tag we pick! Stacked Borrows has support for this by + /// switching to a stack that is only approximately known, i.e. we overapproximate the effect + /// of using *any* exposed pointer for this access, and only keep information about the borrow + /// stack that would be true with all possible choices. Wildcard, } From e287ef66bbb03d8609ae14382337345498598574 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 19 Oct 2023 17:57:28 +0200 Subject: [PATCH 3/4] Preparing for merge from rustc --- rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-version b/rust-version index a7c9d720c0..4664cc3982 100644 --- a/rust-version +++ b/rust-version @@ -1 +1 @@ -c07693c1608258f3577eb15057fc0744fa924ae9 +c104861b7b51d2c28e7023e7e53db16cc6677e29 From 77bf0b5d331538012e7a549a5f098a53c71d7e21 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 19 Oct 2023 21:26:38 +0200 Subject: [PATCH 4/4] fmt --- src/intptrcast.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/intptrcast.rs b/src/intptrcast.rs index 154d86375c..0bdea15763 100644 --- a/src/intptrcast.rs +++ b/src/intptrcast.rs @@ -226,10 +226,7 @@ impl<'mir, 'tcx> GlobalStateInner { // Add offset with the right kind of pointer-overflowing arithmetic. let dl = ecx.data_layout(); let absolute_addr = dl.overflowing_offset(base_addr, offset.bytes()).0; - Ok(Pointer::new( - Provenance::Concrete { alloc_id, tag }, - Size::from_bytes(absolute_addr), - )) + Ok(Pointer::new(Provenance::Concrete { alloc_id, tag }, Size::from_bytes(absolute_addr))) } /// When a pointer is used for a memory access, this computes where in which allocation the