-
Notifications
You must be signed in to change notification settings - Fork 193
Open
Description
I'm using c9d272a.
I'm trying to transition from one nvexec::stream_context
to another, but it seg faults.
Here is the reproducer:
TEST(try, reproducer)
{
::nvexec::stream_context stream_ctx_0{};
::nvexec::stream_context stream_ctx_1{};
auto snd = ::stdexec::schedule(stream_ctx_0.get_scheduler())
| ::stdexec::then([=] () -> int {
if (::nvexec::is_on_gpu())
return 1;
else
return 0;
})
| ::stdexec::continues_on(stream_ctx_1.get_scheduler())
| ::stdexec::then([=](const int val) -> int {
if (::nvexec::is_on_gpu() && val == 1)
return 2;
else
return 0;
});
const auto [result] = ::stdexec::sync_wait(std::move(snd)).value();
ASSERT_EQ(result, 2);
}
Note that if I change to ::stdexec::continues_on(stream_ctx_0.get_scheduler())
(i.e. transitioning from stream_ctx_0
to itself), it works fine.
Here is the backtrace I get from cuda-gdb
:
[----------] 1 test from try
[ RUN ] try.reproducer
[New Thread 0x7fff995fe000 (LWP 33576)]
[New Thread 0x7fff98dfd000 (LWP 33577)]
[New Thread 0x7fff79fff000 (LWP 33578)]
[Thread 0x7fff98dfd000 (LWP 33577) exited]
Thread 21 "tests_nvexec_ad" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fff995fe000 (LWP 33576)]
0x00007ffff274276b in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
(cuda-gdb) bt
#0 0x00007ffff274276b in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#1 0x00007ffff284214e in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#2 0x00007ffff27f97c9 in ?? () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#3 0x00007ffff27df6f0 in cuMemFreeAsync () from /usr/lib/x86_64-linux-gnu/libcuda.so.1
#4 0x00007ffff2016540 in ?? () from /usr/local/cuda/lib64/libcudart.so.12
#5 0x00007ffff207e05f in cudaFreeAsync () from /usr/local/cuda/lib64/libcudart.so.12
#6 0x000055555556ecb1 in nvexec::_strm::continuation_task_t<nvexec::_strm::_continues_on::operation_state_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_t, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda()#1}>, nvexec::_strm::_sched_from::receiver_t<nvexec::_strm::continues_on_sender_t<nvexec::_strm::stream_scheduler, nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_t, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda()#1}> >, nvexec::_strm::_then::receiver_t<4ul, nvexec::_strm::propagate_receiver_t<nvexec::_strm::_sync_wait::receiver_t<nvexec::_strm::then_sender_t<nvexec::_strm::schedule_from_sender_t<nvexec::_strm::stream_scheduler, nvexec::_strm::continues_on_sender_t<nvexec::_strm::stream_scheduler, nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_t, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda()#1}> > >, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda(int)#1}> > >, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda(int)#1}> > >::__t::receiver_t, nvexec::variant_t<cuda::std::__4::tuple<nvexec::_strm::set_noop>, cuda::std::__4::tuple<stdexec::__rcvrs::set_value_t, int>, cuda::std::__4::tuple<stdexec::__rcvrs::set_error_t, cudaError>, cuda::std::__4::tuple<stdexec::__rcvrs::set_error_t, std::__exception_ptr::exception_ptr> > >::continuation_task_t(nvexec::_strm::_continues_on::operation_state_t<nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_t, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda()#1}>, nvexec::_strm::_sched_from::receiver_t<nvexec::_strm::continues_on_sender_t<nvexec::_strm::stream_scheduler, nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_t, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda()#1}> >, nvexec::_strm::_then::receiver_t<4ul, nvexec::_strm::propagate_receiver_t<nvexec::_strm::_sync_wait::receiver_t<nvexec::_strm::then_sender_t<nvexec::_strm::schedule_from_sender_t<nvexec::_strm::stream_scheduler, nvexec::_strm::continues_on_sender_t<nvexec::_strm::stream_scheduler, nvexec::_strm::then_sender_t<nvexec::_strm::stream_scheduler::sender_t, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda()#1}> > >, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda(int)#1}> > >, tests::nvexec::adaptors::try_reproducer_Test::TestBody()::{lambda(int)#1}> > >::__t::receiver_t, nvexec::variant_t<cuda::std::__4::tuple<nvexec::_strm::set_noop>, cuda::std::__4::tuple<stdexec::__rcvrs::set_value_t, int>, cuda::std::__4::tuple<stdexec::__rcvrs::set_error_t, cudaError>, cuda::std::__4::tuple<stdexec::__rcvrs::set_error_t, std::__exception_ptr::exception_ptr> >*, CUstream_st*, std::pmr::memory_resource*)::{lambda(nvexec::_strm::queue::task_base_t*)#2}::__invoke(nvexec::_strm::queue::task_base_t*) ()
I am on a AMPERE86
GPU with Cuda
12.8, compiling with:
Ubuntu clang version 20.1.2 (++20250331083337+bc65196c0919-1
exp120250331203353.96)
Any idea ? I am assuming that my code is legit, but I might be wrong as well. Thanks!
Metadata
Metadata
Assignees
Labels
No labels