我正在尝试从设备数组的一部分创建一个纹理3D。
为此,以下是我的步骤:
- 分配设备数组
- 写入设备数组
- 创建CudaArray(3D)
- 将纹理绑定到CudaArray上
我这样做时不会出现编译器错误,但是当我运行cuda-memcheck并尝试从纹理中获取数据时失败了。
Invalid global read of size 8 .. Address 0x10dfaf3a0 is out of bounds
这就是我猜测我声明纹理数组错误的原因。下面是我访问纹理的方式:
tex3D(NoiseTextures[i],x,y,z)
我执行上述步骤的方法如下:
1. 分配设备数组
cudaMalloc((void **)&d_Noise, sqrSizeNoise*nNoise*sizeof(float));
2.编写设备数组
curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(gen,Seed);
curandGenerateUniform(gen, d_Noise, sqrSizeNoise*nNoise);
curandDestroyGenerator(gen);
3+4. 创建Cuda数组并将其绑定到纹理(我猜错误出在这里)
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();//cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray *d_cuArr;
cudaMalloc3DArray(&d_cuArr, &channelDesc, make_cudaExtent(SizeNoise,SizeNoise,SizeNoise), 0);
cudaMemcpy3DParms copyParams = {0};
//Loop for every separated Noise Texture (nNoise = 4)
for(int i = 0; i < nNoise; i++){
//initialize the textures
NoiseTextures[i] = texture<float, 3, cudaReadModeElementType>(1,cudaFilterModeLinear,cudaAddressModeWrap,channelDesc);
//Array creation
//+(sqrSizeNoise*i) is to separate the created Noise Array into smaller parts with the size of SizeNoise^3
copyParams.srcPtr = make_cudaPitchedPtr(d_Noise+(sqrSizeNoise*i), SizeNoise*sizeof(float), SizeNoise, SizeNoise);
copyParams.dstArray = d_cuArr;
copyParams.extent = make_cudaExtent(SizeNoise,SizeNoise,SizeNoise);
copyParams.kind = cudaMemcpyDeviceToDevice;
checkCudaErrors(cudaMemcpy3D(©Params));
//Array creation End
//new Bind
// set texture parameters
NoiseTextures[i].normalized = true; // access with normalized texture coordinates
NoiseTextures[i].filterMode = cudaFilterModeLinear; // linear interpolation
NoiseTextures[i].addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates
NoiseTextures[i].addressMode[1] = cudaAddressModeWrap;
NoiseTextures[i].addressMode[2] = cudaAddressModeWrap;
// bind array to 3D texture
checkCudaErrors(cudaBindTextureToArray(NoiseTextures[i], d_cuArr, channelDesc));
//end Bind
}
cudaFreeArray(d_cuArr);
我已将这段代码片段粘贴到Pastebin上,以便更容易查看和区分颜色等信息。 http://pastebin.com/SM3dYd38 希望我已清楚地描述了我的问题。如果没有,请在评论中提出!
你能帮助我吗? 感谢阅读。
Cery
编辑: 这里有完整的代码,所以你可以在自己的机器上尝试它:
#include <helper_cuda.h>
#include <helper_functions.h>
#include <helper_cuda_gl.h>
#include <texture_types.h>
#include <cuda_runtime.h>
#include <curand.h>
static texture<float, 3, cudaReadModeElementType> NoiseTextures[4];//texture Array
float *d_NoiseTest;//Device Array with random floats
int SizeNoiseTest = 32;
int sqrSizeNoiseTest = 32768;
void CreateTexture();
__global__ void AccesTexture(texture<float, 3, cudaReadModeElementType>* NoiseTextures)
{
int test = tex3D(NoiseTextures[0],threadIdx.x,threadIdx.y,threadIdx.z);//by using this the error occurs
}
int
main(int argc, char **argv)
{
CreateTexture();
}
void CreateTexture()
{
//curand Random Generator (needs compiler link -lcurand)
curandGenerator_t gen;
cudaMalloc((void **)&d_NoiseTest, sqrSizeNoiseTest*4*sizeof(float));//Allocation of device Array
curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(gen,1234ULL);
curandGenerateUniform(gen, d_NoiseTest, sqrSizeNoiseTest*4);//writing data to d_NoiseTest
curandDestroyGenerator(gen);
//cudaArray Descriptor
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
//cuda Array
cudaArray *d_cuArr;
cudaMalloc3DArray(&d_cuArr, &channelDesc, make_cudaExtent(SizeNoiseTest*sizeof(float),SizeNoiseTest,SizeNoiseTest), 0);
cudaMemcpy3DParms copyParams = {0};
//Loop for every separated Noise Texture (4 = 4)
for(int i = 0; i < 4; i++){
//initialize the textures
NoiseTextures[i] = texture<float, 3, cudaReadModeElementType>(1,cudaFilterModeLinear,cudaAddressModeWrap,channelDesc);
//Array creation
//+(sqrSizeNoise*i) is to separate the created Noise Array into smaller parts with the size of SizeNoise^3
copyParams.srcPtr = make_cudaPitchedPtr(d_NoiseTest+(sqrSizeNoiseTest*i), SizeNoiseTest*sizeof(float), SizeNoiseTest, SizeNoiseTest);
copyParams.dstArray = d_cuArr;
copyParams.extent = make_cudaExtent(SizeNoiseTest*sizeof(float),SizeNoiseTest,SizeNoiseTest);
copyParams.kind = cudaMemcpyDeviceToDevice;
checkCudaErrors(cudaMemcpy3D(©Params));
//Array creation End
//new Bind
// set texture parameters
NoiseTextures[i].normalized = true; // access with normalized texture coordinates
NoiseTextures[i].filterMode = cudaFilterModeLinear; // linear interpolation
NoiseTextures[i].addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates
NoiseTextures[i].addressMode[1] = cudaAddressModeWrap;
NoiseTextures[i].addressMode[2] = cudaAddressModeWrap;
// bind array to 3D texture
checkCudaErrors(cudaBindTextureToArray(NoiseTextures[i], d_cuArr, channelDesc));
//end Bind
}
cudaFreeArray(d_cuArr);
AccesTexture<<<1,dim3(4,4,4)>>>(NoiseTextures);
}
你需要链接-lcurand。并且包含CUDA-6.0 / samples / common / inc。
现在我在这段代码中遇到了不同的错误。
代码=11(cudaErrorInvalidValue)“cudaMemcpy3D(©Params)”
尽管它与我的原始代码完全相同。 - 我开始感到完全困惑。
谢谢你的帮助。
make_cudaExtent(SizeNoise*sizeof(float),SizeNoise,SizeNoise);
。 - Kamil Czerskidim3(32,32,32)
不是任何当前的CUDA GPU的有效线程块配置。无论之前的代码如何,你都不可能运行该内核。我们不会将纹理引用作为内核的参数进行传递。它是一个静态实体。你只需要使用它。最好先学习基本的纹理知识。 - Robert CrovellacudaMemcpy3D
操作中涉及到cudaArray
,则用于3D复制参数的范围不是以字节为单位指定的,而是以元素为单位指定的。请参考文档。 - Robert Crovella