Skip to content

codegen: Unnecessary memcpy when returning a mutated mut self #157685

@Filiprogrammer

Description

@Filiprogrammer

When implementing an operator by delegating to its Assign version, two formulations that should be equivalent produce different assembly. Using mut self generates a redundant memcpy compared to using let mut ret = self.

Produces suboptimal assembly (Godbolt):

use std::ops::{BitAnd, BitAndAssign};

struct Foo([u128; 1]);

impl BitAndAssign<&Self> for Foo {
    #[unsafe(no_mangle)]
    fn bitand_assign(&mut self, other: &Self) {
        self.0[0] &= other.0[0];
    }
}

impl BitAnd<&Self> for Foo {
    type Output = Foo;
    #[unsafe(no_mangle)]
    fn bitand(mut self, other: &Self) -> Self::Output {
        self &= other;
        self
    }
}

Generated LLVM IR

define void @bitand(ptr dead_on_unwind noalias nofree noundef writable writeonly sret([16 x i8]) align 16 captures(none) dereferenceable(16) initializes((0, 16)) %_0, ptr dead_on_return noalias nofree noundef align 16 captures(none) dereferenceable(16) %self, ptr noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %other) unnamed_addr {
start:
  tail call void @llvm.experimental.noalias.scope.decl(metadata !29)
  tail call void @llvm.experimental.noalias.scope.decl(metadata !33)
  %_3.i = load i128, ptr %other, align 16
  %0 = load i128, ptr %self, align 16
  %1 = and i128 %0, %_3.i
  store i128 %1, ptr %self, align 16
  tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) %_0, ptr noundef nonnull align 16 dereferenceable(16) %self, i64 16, i1 false)
  ret void
}

define void @bitand_assign(ptr noalias nofree noundef align 16 captures(none) dereferenceable(16) %self, ptr noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %other) unnamed_addr {
start:
  %_3 = load i128, ptr %other, align 16
  %0 = load i128, ptr %self, align 16
  %1 = and i128 %0, %_3
  store i128 %1, ptr %self, align 16
  ret void
}

declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #2

declare void @llvm.experimental.noalias.scope.decl(metadata) #3

Generated amd64 assembly:

bitand:
        mov     rax, rdi
        movaps  xmm0, xmmword ptr [rsi]
        andps   xmm0, xmmword ptr [rdx]
        movaps  xmmword ptr [rsi], xmm0   ; store to self
        movaps  xmm0, xmmword ptr [rsi]   ; load from self
        movaps  xmmword ptr [rdi], xmm0   ; store to output (should have been direct)
        ret

bitand_assign:
        movaps  xmm0, xmmword ptr [rdi]
        andps   xmm0, xmmword ptr [rsi]
        movaps  xmmword ptr [rdi], xmm0
        ret

Produces optimal assembly (Godbolt):

use std::ops::{BitAnd, BitAndAssign};

struct Foo([u128; 1]);

impl BitAndAssign<&Self> for Foo {
    #[unsafe(no_mangle)]
    fn bitand_assign(&mut self, other: &Self) {
        self.0[0] &= other.0[0];
    }
}

impl BitAnd<&Self> for Foo {
    type Output = Foo;
    #[unsafe(no_mangle)]
    fn bitand(self, other: &Self) -> Self::Output {
        let mut ret = self;
        ret &= other;
        ret
    }
}

Generated LLVM IR

define void @bitand(ptr dead_on_unwind noalias nofree noundef writable writeonly sret([16 x i8]) align 16 captures(none) dereferenceable(16) initializes((0, 16)) %_0, ptr dead_on_return noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %self, ptr noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %other) unnamed_addr {
start:
  %ret.sroa.0.0.copyload = load i128, ptr %self, align 16
  %_3.i = load i128, ptr %other, align 16
  %0 = and i128 %_3.i, %ret.sroa.0.0.copyload
  store i128 %0, ptr %_0, align 16
  ret void
}

define void @bitand_assign(ptr noalias nofree noundef align 16 captures(none) dereferenceable(16) %self, ptr noalias nofree noundef readonly align 16 captures(none) dereferenceable(16) %other) unnamed_addr {
start:
  %_3 = load i128, ptr %other, align 16
  %0 = load i128, ptr %self, align 16
  %1 = and i128 %0, %_3
  store i128 %1, ptr %self, align 16
  ret void
}

Generated amd64 assembly:

bitand:
        mov     rax, rdi
        movaps  xmm0, xmmword ptr [rdx]
        andps   xmm0, xmmword ptr [rsi]
        movaps  xmmword ptr [rdi], xmm0
        ret

bitand_assign:
        movaps  xmm0, xmmword ptr [rdi]
        andps   xmm0, xmmword ptr [rsi]
        movaps  xmmword ptr [rdi], xmm0
        ret

Observed on rustc 1.98.0-nightly (cb46fbb 2026-06-08)

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-codegenArea: Code generationC-bugCategory: This is a bug.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchneeds-triageThis issue may need triage. Remove it if it has been sufficiently triaged.

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions