diff --git a/benches/bench.rs b/benches/bench.rs index b6a8b85..88ccb4f 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -310,3 +310,14 @@ fn bench_macro_from_list_vec(b: &mut Bencher) { vec }); } + +// Regression bench for https://github.com/rust-lang/rust/issues/157743: +// rustc 1.93+ emits a memset over the entire struct instead of a single +// store to the capacity field when a SmallVec is wrapped in a newtype. +// Uses a capacity large enough that the memset becomes a function call +// rather than inline SSE stores, making the regression clearly measurable. +#[bench] +fn bench_new_in_newtype(b: &mut Bencher) { + struct Wrap(SmallVec<[u64; 32]>); + b.iter(|| test::black_box(Wrap(SmallVec::new()))) +} diff --git a/src/lib.rs b/src/lib.rs index aff8401..2f508ff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -651,6 +651,19 @@ impl SmallVecData { inline: core::mem::ManuallyDrop::new(inline), } } + // Workaround for https://github.com/rust-lang/rust/issues/157743: when from_inline is + // called with MaybeUninit::uninit(), rustc 1.93+ GVN propagates const into the + // ManuallyDrop::new() aggregate, causing LLVM to materialize a global constant that + // MemCpyOpt then collapses into a memset over the whole struct. Using assume_init() of a + // doubly-wrapped MaybeUninit produces Immediate::Uninit instead of const , which + // codegen handles as undef without emitting any global. This function also avoids + // introducing an intermediate local that would inflate stack frames in debug builds. + #[inline] + fn empty() -> SmallVecData { + // SAFETY: ManuallyDrop> is valid for any bit pattern including + // uninitialized bytes, so assume_init() on a MaybeUninit of that type is sound. + SmallVecData { inline: unsafe { MaybeUninit::uninit().assume_init() } } + } #[inline] unsafe fn into_inline(self) -> MaybeUninit { core::mem::ManuallyDrop::into_inner(self.inline) @@ -713,6 +726,13 @@ impl SmallVecData { fn from_inline(inline: MaybeUninit) -> SmallVecData { SmallVecData::Inline(inline) } + // See the comment on the union variant's empty() for why this exists. + #[inline] + fn empty() -> SmallVecData { + // SAFETY: MaybeUninit is valid for any bit pattern including uninitialized bytes, + // so assume_init() on a MaybeUninit of that type is sound. + SmallVecData::Inline(unsafe { MaybeUninit::uninit().assume_init() }) + } #[inline] unsafe fn into_inline(self) -> MaybeUninit { match self { @@ -789,7 +809,7 @@ impl SmallVec { ); SmallVec { capacity: 0, - data: SmallVecData::from_inline(MaybeUninit::uninit()), + data: SmallVecData::empty(), } } @@ -831,7 +851,7 @@ impl SmallVec { // Cannot use Vec with smaller capacity // because we use value of `Self::capacity` field as indicator. unsafe { - let mut data = SmallVecData::::from_inline(MaybeUninit::uninit()); + let mut data = SmallVecData::::empty(); let len = vec.len(); vec.set_len(0); ptr::copy_nonoverlapping(vec.as_ptr(), data.inline_mut().as_ptr(), len); @@ -1183,7 +1203,7 @@ impl SmallVec { if unspilled { return Ok(()); } - self.data = SmallVecData::from_inline(MaybeUninit::uninit()); + self.data = SmallVecData::empty(); ptr::copy_nonoverlapping(ptr.as_ptr(), self.data.inline_mut().as_ptr(), len); self.capacity = len; deallocate(ptr, cap); @@ -1283,7 +1303,7 @@ impl SmallVec { if self.inline_size() >= len { unsafe { let (ptr, len) = self.data.heap(); - self.data = SmallVecData::from_inline(MaybeUninit::uninit()); + self.data = SmallVecData::empty(); ptr::copy_nonoverlapping(ptr.as_ptr(), self.data.inline_mut().as_ptr(), len); deallocate(ptr.0, self.capacity); self.capacity = len;