当使用zip_iterator处理元组并且有自定义比较谓词时,CUDA Thrust中的sort_by_key如何排序键值对?

5
我在这里查看了很多类似的问题,虽然有一些小的变化。我正在尝试使用zip_iterator作为复合键对值进行排序。
具体来说,我有以下函数:
void thrustSort(
    unsigned int * primaryKey,
    float * secondaryKey,
    unsigned int * values,
    unsigned int numberOfPoints)
{
    thrust::device_ptr dev_ptr_pkey = thrust::device_pointer_cast(primaryKey);
    thrust::device_ptr dev_ptr_skey = thrust::device_pointer_cast(secondaryKey); 
    thrust::device_ptr dev_ptr_values = thrust::device_pointer_cast(values);
thrust::tuple,thrust::device_ptr> keytup_begin = thrust::make_tuple,thrust::device_ptr>(dev_ptr_pkey, dev_ptr_skey);
thrust::zip_iterator, thrust::device_ptr > > first = thrust::make_zip_iterator, thrust::device_ptr > >(keytup_begin);
thrust::sort_by_key(first, first + numberOfPoints, dev_ptr_values, ZipComparator()); }
以及以下自定义谓词:
typedef thrust::device_ptr<unsigned int> tdp_uint ;
typedef thrust::device_ptr<float> tdp_float ;
typedef thrust::tuple<tdp_uint, tdp_float> tdp_uif_tuple ;

struct ZipComparator
{
    __host__ __device__
    inline bool operator() (const tdp_uif_tuple &a, const tdp_uif_tuple &b)
    {
        if(a.head < b.head) return true;
        if(a.head == b.head) return a.tail < b.tail;
        return false;

    }
};

我收到的错误信息如下:
错误1:错误:没有与参数列表匹配的构造函数“thrust :: device_ptr :: device_ptr [with T = unsigned int]” C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v4.0 \ include \ thrust \ detail \ tuple.inl 309 1 ---
错误2:错误:没有与参数列表匹配的构造函数“thrust :: device_ptr :: device_ptr [with T = float]” C:\ Program Files \ NVIDIA GPU Computing Toolkit \ CUDA \ v4.0 \ include \ thrust \ detail \ tuple.inl 401 1 ---
你有什么想法是什么导致这个问题出现的吗?怎样编写一个确实有效的谓词呢?
提前感谢, Nathan

你能否发布产生这些错误消息的实际代码?看起来在复制/粘贴过程中有一些内容丢失了,缺少这些内容会使调试变得困难。 - Jared Hoberock
从技术上讲,仅凭这段代码就足以产生编译错误,我很快会上传一个“运行”代码示例。 - Nathan Dortman
很抱歉,我未能上传一个可运行的示例......我负担着维护一个由不注重编码风格的人编写的庞大代码库,因此提取相关代码似乎是不可能的。 无论如何,我已经接受下面的答案解决了我所有的问题,所以我希望这已经足够了。 谢谢! - Nathan Dortman
2个回答

2

比较器需要的参数类型为const thrust::tuple<unsigned int, float>&。而你定义的const tdp_uif_tuple&类型会被扩展为const thrust::tuple<thrust::device_ptr<unsigned int>, thrust:device_ptr<float> >&

以下代码在我的编译环境中可以通过:

struct ZipComparator
{
    __host__ __device__
    inline bool operator() (const thrust::tuple<unsigned int, float> &a, const thrust::tuple<unsigned int, float> &b)
    {
        if(a.head < b.head) return true;
        if(a.head == b.head) return a.tail < b.tail;
        return false;

    }
};

希望这对您也有帮助:) http://code.google.com/p/thrust/wiki/QuickStartGuide#zip_iterator 上有关于 zip 迭代器的更多详细信息。
不是必需的,但如果您想要简化这些模板的长度,您可以这样做:
void thrustSort(
    unsigned int * primaryKey,
    float * secondaryKey,
    unsigned int * values,
    unsigned int numberOfPoints)
{
    tdp_uint dev_ptr_pkey(primaryKey);
    tdp_float dev_ptr_skey(secondaryKey);   
    tdp_uint dev_ptr_values(values);

    thrust::tuple<tdp_uint, tdp_float> keytup_begin = thrust::make_tuple(dev_ptr_pkey, dev_ptr_skey);

    thrust::zip_iterator<thrust::tuple<tdp_uint, tdp_float> > first =
    thrust::make_zip_iterator(keytup_begin);

    thrust::sort_by_key(first, first + numberOfPoints, dev_ptr_values, ZipComparator());    
}

很多模板参数可以从参数中推断出来。

1
这是一个完整的示例,演示如何在使用zip_iterator和自定义比较运算符处理tuple键时使用sort_by_key
#include <thrust/device_vector.h>
#include <thrust/sort.h>

#include "Utilities.cuh"

// --- Defining tuple type
typedef thrust::tuple<int, int> Tuple;

/**************************/
/* TUPLE ORDERING FUNCTOR */
/**************************/
struct TupleComp
{
    __host__ __device__ bool operator()(const Tuple& t1, const Tuple& t2)
    {
        if (t1.get<0>() < t2.get<0>())
            return true;
        if (t1.get<0>() > t2.get<0>())
            return false;
        return t1.get<1>() < t2.get<1>();
    }
};

/********/
/* MAIN */
/********/
int main()
{
    const int N = 8;

    // --- Keys and values on the host: allocation and definition
    int h_keys1[N]      = { 1, 3, 3, 3, 2, 3, 2, 1 };                                         
    int h_keys2[N]      = { 1, 5, 3, 8, 2, 8, 1, 1 };                                         
    float h_values[N]   = { 0.3, 5.1, 3.2, -0.08, 2.1, 5.2, 1.1, 0.01};

    printf("\n\n");
    printf("Original\n");
    for (int i = 0; i < N; i++) {
        printf("%i %i %f\n", h_keys1[i], h_keys2[i], h_values[i]);
    }

    // --- Keys and values on the device: allocation
    int *d_keys1;       gpuErrchk(cudaMalloc(&d_keys1, N * sizeof(int)));
    int *d_keys2;       gpuErrchk(cudaMalloc(&d_keys2, N * sizeof(int)));
    float *d_values;    gpuErrchk(cudaMalloc(&d_values, N * sizeof(float)));

    // --- Keys and values: host -> device
    gpuErrchk(cudaMemcpy(d_keys1, h_keys1, N * sizeof(int), cudaMemcpyHostToDevice));
    gpuErrchk(cudaMemcpy(d_keys2, h_keys2, N * sizeof(int), cudaMemcpyHostToDevice));
    gpuErrchk(cudaMemcpy(d_values, h_values, N * sizeof(float), cudaMemcpyHostToDevice));

    // --- From raw pointers to device_ptr
    thrust::device_ptr<int> dev_ptr_keys1 = thrust::device_pointer_cast(d_keys1);
    thrust::device_ptr<int> dev_ptr_keys2 = thrust::device_pointer_cast(d_keys2);
    thrust::device_ptr<float> dev_ptr_values = thrust::device_pointer_cast(d_values);

    // --- Declare outputs
    thrust::device_vector<float> d_values_output(N);
    thrust::device_vector<Tuple> d_keys_output(N);

    auto begin_keys = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1, dev_ptr_keys2));
    auto end_keys = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1 + N, dev_ptr_keys2 + N));

    thrust::sort_by_key(begin_keys, end_keys, dev_ptr_values, TupleComp());

    int *h_keys1_output = (int *)malloc(N * sizeof(int));
    int *h_keys2_output = (int *)malloc(N * sizeof(int));
    float *h_values_output = (float *)malloc(N * sizeof(float));

    gpuErrchk(cudaMemcpy(h_keys1_output, d_keys1, N * sizeof(int), cudaMemcpyDeviceToHost));
    gpuErrchk(cudaMemcpy(h_keys2_output, d_keys2, N * sizeof(int), cudaMemcpyDeviceToHost));
    gpuErrchk(cudaMemcpy(h_values_output, d_values, N * sizeof(float), cudaMemcpyDeviceToHost));

    printf("\n\n");
    printf("Ordered\n");
    for (int i = 0; i < N; i++) {
        printf("%i %i %f\n", h_keys1_output[i], h_keys2_output[i], h_values_output[i]);
    }

}

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接