假设我有一个与设备 i 相关联的活动CUDA上下文,现在我调用
cudaSetDevice(i)
。 会发生什么?
- 什么也不会发生?
- 主要上下文替换堆栈顶部?
- 将主要上下文推送到堆栈上?
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cassert>
#include <iostream>
int main()
{
CUcontext ctx1, primary;
cuInit(0);
auto status = cuCtxCreate(&ctx1, 0, 0);
assert (status == (CUresult) cudaSuccess);
cuCtxPushCurrent(ctx1);
status = cudaSetDevice(0);
assert (status == cudaSuccess);
void* ptr1;
void* ptr2;
cudaMalloc(&ptr1, 1024);
assert (status == cudaSuccess);
cuCtxGetCurrent(&primary);
assert (status == (CUresult) cudaSuccess);
assert(primary != ctx1);
status = cuCtxPushCurrent(ctx1);
assert (status == (CUresult) cudaSuccess);
cudaMalloc(&ptr2, 1024);
assert (status == (CUresult) cudaSuccess);
cudaSetDevice(0);
assert (status == (CUresult) cudaSuccess);
int i = 0;
while (true) {
status = cuCtxPopCurrent(&primary);
if (status != (CUresult) cudaSuccess) { break; }
std::cout << "Next context on stack (" << i++ << ") is " << (void*) primary << '\n';
}
}
我得到了以下输出:
context ctx1 is 0x563ec6225e30
primary context is 0x563ec61f5490
Next context on stack (0) is 0x563ec61f5490
Next context on stack (1) is 0x563ec61f5490
Next context on stack(2) is 0x563ec6225e3
这似乎是行为 有时 是替换,而有时是推送。
发生了什么?
cudaSetDevice()
的行为,我可以更改该代码以执行以下操作:1.查看当前上下文。2.将其保存在侧面。3.使用运行时API将cudaSetDevice()
设置为要使用的设备4.完成我的工作。5.将cuCtxSetCurrent()
设置为我保存的上下文。 - einpoklum