Skip to content

Pass vtable data by value#826

Merged
Darksonn merged 1 commit into
tokio-rs:masterfrom
DaniPopes:data-byval
Apr 29, 2026
Merged

Pass vtable data by value#826
Darksonn merged 1 commit into
tokio-rs:masterfrom
DaniPopes:data-byval

Conversation

@DaniPopes
Copy link
Copy Markdown
Contributor

The drop, into_vec, and into_mut vtable slots now take the loaded *mut () (via with_mut) instead of &[mut] AtomicPtr<()>. This breaks the borrow-back into Bytes, letting LLVM infer captures(none) on the indirect call and elide the temporary memcpy when Bytes is passed by value. clone and is_unique keep &AtomicPtr<()> (called via &self; promotable_*_clone also needs the address for compare_exchange).

Minimal repro (https://godbolt.org/z/jz1437Enz):

Details
// src/lib.rs
use bytes::Bytes;

pub struct MyStuff {
    pub _stuff: [u8; 200],
    pub bytes: Bytes,
}

#[unsafe(no_mangle)]
#[inline(never)]
pub fn lookup(blocking: bool, val: MyStuff) -> u64 {
    if blocking {
        return lookup_blocking(val);
    }
    1
}

#[cold]
#[inline(never)]
pub fn lookup_blocking(_val: MyStuff) -> u64 {
    0
}

Before:

define noundef range(i64 0, 2) i64 @lookup(i1 noundef zeroext %blocking, ptr dead_on_return noalias noundef align 8 captures(address) dereferenceable(232) %val) unnamed_addr #1 {
start:
  %_3 = alloca [232 x i8], align 8
  br i1 %blocking, label %bb1, label %bb3, !prof !15

bb3:                                              ; preds = %start
  tail call void @llvm.experimental.noalias.scope.decl(metadata !16)
  tail call void @llvm.experimental.noalias.scope.decl(metadata !19)
  tail call void @llvm.experimental.noalias.scope.decl(metadata !22)
  %_6.i.i.i = load ptr, ptr %val, align 8, !alias.scope !25, !nonnull !13, !align !14, !noundef !13
  %0 = getelementptr inbounds nuw i8, ptr %_6.i.i.i, i64 32
  %_2.i.i.i = load ptr, ptr %0, align 8, !noalias !25, !nonnull !13, !noundef !13
  %_3.i.i.i = getelementptr inbounds nuw i8, ptr %val, i64 24
  %1 = getelementptr inbounds nuw i8, ptr %val, i64 8
  %_4.i.i.i = load ptr, ptr %1, align 8, !alias.scope !25, !noundef !13
  %2 = getelementptr inbounds nuw i8, ptr %val, i64 16
  %_5.i.i.i = load i64, ptr %2, align 8, !alias.scope !25, !noundef !13
  tail call void %_2.i.i.i(ptr noalias noundef nonnull align 8 dereferenceable(8) %_3.i.i.i, ptr noundef %_4.i.i.i, i64 noundef %_5.i.i.i) #5
  br label %bb4

bb1:                                              ; preds = %start
  call void @llvm.lifetime.start.p0(ptr nonnull %_3)
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(232) %_3, ptr noundef nonnull align 8 dereferenceable(232) %val, i64 232, i1 false)
  %3 = call noundef i64 @lookup_repro::lookup_blocking(ptr noalias noundef nonnull align 8 captures(address) dereferenceable(232) %_3) #6
  call void @llvm.lifetime.end.p0(ptr nonnull %_3)
  br label %bb4

bb4:                                              ; preds = %bb1, %bb3
  %_0.sroa.0.0 = phi i64 [ 0, %bb1 ], [ 1, %bb3 ]
  ret i64 %_0.sroa.0.0
}

After:

define noundef range(i64 0, 2) i64 @lookup(i1 noundef zeroext %blocking, ptr dead_on_return noalias noundef readonly align 8 captures(none) dereferenceable(232) %val) unnamed_addr #1 {
start:
  br i1 %blocking, label %bb1, label %bb3, !prof !15

bb3:                                              ; preds = %start
  tail call void @llvm.experimental.noalias.scope.decl(metadata !16)
  tail call void @llvm.experimental.noalias.scope.decl(metadata !19)
  tail call void @llvm.experimental.noalias.scope.decl(metadata !22)
  %_9.i.i.i = getelementptr inbounds nuw i8, ptr %val, i64 24
  %data.i.i.i = load ptr, ptr %_9.i.i.i, align 8, !alias.scope !25, !noundef !13
  %_6.i.i.i = load ptr, ptr %val, align 8, !alias.scope !25, !nonnull !13, !align !14, !noundef !13
  %0 = getelementptr inbounds nuw i8, ptr %_6.i.i.i, i64 32
  %_3.i.i.i = load ptr, ptr %0, align 8, !noalias !25, !nonnull !13, !noundef !13
  %1 = getelementptr inbounds nuw i8, ptr %val, i64 8
  %_4.i.i.i = load ptr, ptr %1, align 8, !alias.scope !25, !noundef !13
  %2 = getelementptr inbounds nuw i8, ptr %val, i64 16
  %_5.i.i.i = load i64, ptr %2, align 8, !alias.scope !25, !noundef !13
  tail call void %_3.i.i.i(ptr noundef %data.i.i.i, ptr noundef %_4.i.i.i, i64 noundef %_5.i.i.i) #3, !noalias !25
  br label %bb4

bb1:                                              ; preds = %start
  %3 = tail call noundef i64 @lookup_repro::lookup_blocking(ptr noalias noundef nonnull align 8 captures(address) dereferenceable(232) %val) #4
  br label %bb4

bb4:                                              ; preds = %bb1, %bb3
  %_0.sroa.0.0 = phi i64 [ 0, %bb1 ], [ 1, %bb3 ]
  ret i64 %_0.sroa.0.0
}

The `drop`, `into_vec`, and `into_mut` vtable slots now take the loaded
`*mut ()` (via `with_mut`) instead of `&[mut] AtomicPtr<()>`. This breaks
the borrow-back into `Bytes`, letting LLVM infer `captures(none)` on the
indirect call and elide the temporary `memcpy` when `Bytes` is passed by
value. `clone` and `is_unique` keep `&AtomicPtr<()>` (called via `&self`;
`promotable_*_clone` also needs the address for `compare_exchange`).

Minimal repro (https://godbolt.org/z/jz1437Enz):

```rust
use bytes::Bytes;

pub struct MyStuff {
    pub _stuff: [u8; 200],
    pub bytes: Bytes,
}

#[unsafe(no_mangle)]
#[inline(never)]
pub fn lookup(blocking: bool, val: MyStuff) -> u64 {
    if blocking {
        return lookup_blocking(val);
    }
    1
}

#[cold]
#[inline(never)]
pub fn lookup_blocking(_val: MyStuff) -> u64 {
    0
}
```
@Darksonn
Copy link
Copy Markdown
Member

I'm pretty sure we sometimes perform a compare-and-swap on this pointers, which we can't do if you pass it by value.

@DaniPopes
Copy link
Copy Markdown
Contributor Author

DaniPopes commented Apr 27, 2026

Yes, but only for clone, which is described in the description.

We're talking about swapping the AtomicPtr that's inside of Bytes, which is not ever shared when Bytes itself isn't (&mut or owned). This only happens in Clone when turning into a shared vtable.

Copy link
Copy Markdown
Member

@Darksonn Darksonn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks.

@Darksonn Darksonn merged commit 245adff into tokio-rs:master Apr 29, 2026
18 checks passed
@DaniPopes DaniPopes deleted the data-byval branch April 29, 2026 11:09
@DaniPopes
Copy link
Copy Markdown
Contributor Author

Hey @Darksonn, any chance this could be released? It's a pretty minor change but it can cut down stack usage quite a bit in functions when using Bytes or structures containing it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants