When implementing an operator by delegating to its Assign version, two formulations that should be equivalent produce different assembly. Using mut self generates a redundant memcpy compared to using let mut ret = self.
Produces suboptimal assembly (Godbolt):
use std::ops::{BitAnd, BitAndAssign};
struct Foo([u128; 1]);
impl BitAndAssign<&Self> for Foo {
#[unsafe(no_mangle)]
fn bitand_assign(&mut self, other: &Self) {
self.0[0] &= other.0[0];
}
}
impl BitAnd<&Self> for Foo {
type Output = Foo;
#[unsafe(no_mangle)]
fn bitand(mut self, other: &Self) -> Self::Output {
self &= other;
self
}
}
Generated LLVM IR
define void @bitand(ptr dead_on_unwind noalias nofree noundef writable writeonly sret([16 x i8]) align 16 captures(none) dereferenceable(16) initializes((0, 16)) %_0, ptr dead_on_return noalias nofree noundef align 16 captures(none) dereferenceable(16) %self, ptr noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %other) unnamed_addr {
start:
tail call void @llvm.experimental.noalias.scope.decl(metadata !29)
tail call void @llvm.experimental.noalias.scope.decl(metadata !33)
%_3.i = load i128, ptr %other, align 16
%0 = load i128, ptr %self, align 16
%1 = and i128 %0, %_3.i
store i128 %1, ptr %self, align 16
tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) %_0, ptr noundef nonnull align 16 dereferenceable(16) %self, i64 16, i1 false)
ret void
}
define void @bitand_assign(ptr noalias nofree noundef align 16 captures(none) dereferenceable(16) %self, ptr noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %other) unnamed_addr {
start:
%_3 = load i128, ptr %other, align 16
%0 = load i128, ptr %self, align 16
%1 = and i128 %0, %_3
store i128 %1, ptr %self, align 16
ret void
}
declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #2
declare void @llvm.experimental.noalias.scope.decl(metadata) #3
Generated amd64 assembly:
bitand:
mov rax, rdi
movaps xmm0, xmmword ptr [rsi]
andps xmm0, xmmword ptr [rdx]
movaps xmmword ptr [rsi], xmm0 ; store to self
movaps xmm0, xmmword ptr [rsi] ; load from self
movaps xmmword ptr [rdi], xmm0 ; store to output (should have been direct)
ret
bitand_assign:
movaps xmm0, xmmword ptr [rdi]
andps xmm0, xmmword ptr [rsi]
movaps xmmword ptr [rdi], xmm0
ret
Produces optimal assembly (Godbolt):
use std::ops::{BitAnd, BitAndAssign};
struct Foo([u128; 1]);
impl BitAndAssign<&Self> for Foo {
#[unsafe(no_mangle)]
fn bitand_assign(&mut self, other: &Self) {
self.0[0] &= other.0[0];
}
}
impl BitAnd<&Self> for Foo {
type Output = Foo;
#[unsafe(no_mangle)]
fn bitand(self, other: &Self) -> Self::Output {
let mut ret = self;
ret &= other;
ret
}
}
Generated LLVM IR
define void @bitand(ptr dead_on_unwind noalias nofree noundef writable writeonly sret([16 x i8]) align 16 captures(none) dereferenceable(16) initializes((0, 16)) %_0, ptr dead_on_return noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %self, ptr noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %other) unnamed_addr {
start:
%ret.sroa.0.0.copyload = load i128, ptr %self, align 16
%_3.i = load i128, ptr %other, align 16
%0 = and i128 %_3.i, %ret.sroa.0.0.copyload
store i128 %0, ptr %_0, align 16
ret void
}
define void @bitand_assign(ptr noalias nofree noundef align 16 captures(none) dereferenceable(16) %self, ptr noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %other) unnamed_addr {
start:
%_3 = load i128, ptr %other, align 16
%0 = load i128, ptr %self, align 16
%1 = and i128 %0, %_3
store i128 %1, ptr %self, align 16
ret void
}
Generated amd64 assembly:
bitand:
mov rax, rdi
movaps xmm0, xmmword ptr [rdx]
andps xmm0, xmmword ptr [rsi]
movaps xmmword ptr [rdi], xmm0
ret
bitand_assign:
movaps xmm0, xmmword ptr [rdi]
andps xmm0, xmmword ptr [rsi]
movaps xmmword ptr [rdi], xmm0
ret
Observed on rustc 1.98.0-nightly (cb46fbb 2026-06-08)
When implementing an operator by delegating to its
Assignversion, two formulations that should be equivalent produce different assembly. Usingmut selfgenerates a redundantmemcpycompared to usinglet mut ret = self.Produces suboptimal assembly (Godbolt):
Generated LLVM IR
Generated
amd64assembly:Produces optimal assembly (Godbolt):
Generated LLVM IR
Generated
amd64assembly:Observed on rustc 1.98.0-nightly (cb46fbb 2026-06-08)