pybind11:如何在Python和C++之间传递MPI通信器

4
我是一名有用的助手,可以为您翻译以下内容:

我有一个C++类,打算从Python的mpi4py接口调用它,以便每个节点都可以生成该类。在C++方面,我正在使用Open MPI库(通过homebrew安装)和pybind11

C++类如下:

#include <pybind11/pybind11.h>
#include <iostream>
#include <chrono>
#include <thread>
#include <vector>
#include <mpi.h>
// #define PyMPI_HAVE_MPI_Message 1
// #include <mpi4py/mpi4py.h>


namespace py = pybind11;

class SomeComputation
{
    float multiplier;
    std::vector<int> test;
    MPI_Comm comm_;

public:
    void Init()
    {
        int rank;
        MPI_Comm_rank(comm_, &rank);
        test.clear();
        test.resize(10, rank);
    }

    void set_comm(MPI_Comm comm){
        this->comm_ = comm;
    }

    SomeComputation(float multiplier_) : multiplier(multiplier_){}
    ~SomeComputation() { std::cout << "Destructor Called!\n"; }


    float compute(float input)
    {
        std::this_thread::sleep_for(std::chrono::milliseconds((int)input * 10));
        for (int i = 0; i != 10; ++i)
        {
            std::cout << test[i] << " ";
        }
        std::cout << std::endl;
        return multiplier * input;
    }
};

PYBIND11_MODULE(module_name, handle)
{
    py::class_<SomeComputation>(handle, "Cpp_computation")
        .def(py::init<float>()) // args of constructers are template args
        .def("set_comm", &SomeComputation::set_comm)  
        .def("compute", &SomeComputation::compute)
        .def("cpp_init", &SomeComputation::Init);
}

这里是Python接口,产生与C++相同效果的代码:

from build.module_name import * 
import time

from mpi4py import MPI


comm = MPI.COMM_WORLD
rank = comm.Get_rank()


m = Cpp_computation(44.0) # send communicator to cpp
m.cpp_init()
i = 0
while i < 5:
    print(m.compute(i))
    time.sleep(1)
    i+=1

我已经尝试过 "使用pybind11共享MPI通信器" 但是我卡在了一个冗长且没有帮助的错误上 (完整信息):

[...]
/Users/purusharth/Documents/hiwi/pympicontroller/pybind11/include/pybind11/pybind11.h:1398:22:   required from 'pybind11::class_<type_, options>& pybind11::class_<type_, options>::def(const char*, Func&&, const Extra& ...) [with Func = void (SomeComputation::*)(ompi_communicator_t*); Extra = {}; type_ = SomeComputation; options = {}]'
/Users/purusharth/Documents/hiwi/pympicontroller/main.cpp:79:7:   required from here
/opt/homebrew/Cellar/gcc/11.2.0_3/include/c++/11/type_traits:1372:38: error: invalid use of incomplete type 'struct ompi_communicator_t'
 1372 |     : public integral_constant<bool, __is_base_of(_Base, _Derived)>
      |                                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from /Users/purusharth/Documents/hiwi/pympicontroller/main.cpp:6:
/opt/homebrew/Cellar/open-mpi/4.1.2/include/mpi.h:419:16: note: forward declaration of 'struct ompi_communicator_t'
  419 | typedef struct ompi_communicator_t *MPI_Comm;
      |                ^~~~~~~~~~~~~~~~~~~

[...]

/Users/purusharth/Documents/hiwi/pympicontroller/pybind11/include/pybind11/pybind11.h:1398:22:   required from 'pybind11::class_<type_, options>& pybind11::class_<type_, options>::def(const char*, Func&&, const Extra& ...) [with Func = void (SomeComputation::*)(ompi_communicator_t*); Extra = {}; type_ = SomeComputation; options = {}]'
/Users/purusharth/Documents/hiwi/pympicontroller/main.cpp:79:7:   required from here
/Users/purusharth/Documents/hiwi/pympicontroller/pybind11/include/pybind11/detail/descr.h:40:19: error: invalid use of incomplete type 'struct ompi_communicator_t'
   40 |         return {{&typeid(Ts)..., nullptr}};
      |                   ^~~~~~~~~~
In file included from /Users/purusharth/Documents/hiwi/pympicontroller/main.cpp:6:
/opt/homebrew/Cellar/open-mpi/4.1.2/include/mpi.h:419:16: note: forward declaration of 'struct ompi_communicator_t'
  419 | typedef struct ompi_communicator_t *MPI_Comm;
      |                ^~~~~~~~~~~~~~~~~~~

[...]

                 from /Users/purusharth/Documents/hiwi/pympicontroller/main.cpp:1:
/Users/purusharth/Documents/hiwi/pympicontroller/pybind11/include/pybind11/detail/descr.h:40:42: error: could not convert '{{<expression error>, nullptr}}' from '<brace-enclosed initializer list>' to 'std::array<const std::type_info*, 3>'
   40 |         return {{&typeid(Ts)..., nullptr}};
      |                                          ^
      |                                          |
      |                                          <brace-enclosed initializer list>

[...]

In file included from /Users/purusharth/Documents/hiwi/pympicontroller/main.cpp:1:
/Users/purusharth/Documents/hiwi/pympicontroller/pybind11/include/pybind11/pybind11.h: In instantiation of 'void pybind11::cpp_function::initialize(Func&&, Return (*)(Args ...), const Extra& ...) [with Func = pybind11::cpp_function::cpp_function<void, SomeComputation, ompi_communicator_t*, pybind11::name, pybind11::is_method, pybind11::sibling>(void (SomeComputation::*)(ompi_communicator_t*), const pybind11::name&, const pybind11::is_method&, const pybind11::sibling&)::<lambda(SomeComputation*, ompi_communicator_t*)>; Return = void; Args = {SomeComputation*, ompi_communicator_t*}; Extra = {pybind11::name, pybind11::is_method, pybind11::sibling}]':
[..]
/Users/purusharth/Documents/hiwi/pympicontroller/pybind11/include/pybind11/pybind11.h:1398:22:   required from 'pybind11::class_<type_, options>& pybind11::class_<type_, options>::def(const char*, Func&&, const Extra& ...) [with Func = void (SomeComputation::*)(ompi_communicator_t*); Extra = {}; type_ = SomeComputation; options = {}]'
/Users/purusharth/Documents/hiwi/pympicontroller/main.cpp:79:7:   required from here
/Users/purusharth/Documents/hiwi/pympicontroller/pybind11/include/pybind11/pybind11.h:266:73:   in 'constexpr' expansion of 'pybind11::detail::descr<18, SomeComputation, ompi_communicator_t>::types()'
/Users/purusharth/Documents/hiwi/pympicontroller/pybind11/include/pybind11/pybind11.h:266:39: error: 'constexpr' call flows off the end of the function
  266 |         PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types();
      |                                       ^~~~~

错误指向.def("set_comm", &SomeComputation::set_comm) 这些错误的原因是什么,应该如何解决?
更新:在this answer中使用自定义类型转换器添加了答案。但这是唯一的方法吗?

从我所能解读的来看,MPI_comm只被声明而没有被定义,因此你应该通过(MPI_comm* comm_;)持有它,而不是直接通过值进行持有。请参见https://dev59.com/v2ox5IYBdhLWcg3wyHPc - unddoch
当然,那可能不是真正的问题。在这种情况下,你能否尝试发布完整的main.cpp文件?错误引用了第79行,但你的代码比这还要短。 - unddoch
据我所知,我已经包含了所有mpi所需的头文件。事实上,如果我删除mpi通信器的设置器,编译就可以顺利完成,不会产生任何错误。 - Jarwin
@unddoch 我在粘贴代码之前删除了一些注释,第79行最初指向set_comm的定义。(问题已更新) - Jarwin
struct ompi_communicator_t 看起来是在 "open-mpi/4.1.2/include/ompi/communicator/communicator.h" 中声明的。如果将 #include<ompi/communicator/communicator.h> 添加到C++类定义文件中会发生什么? - outis
显示剩余4条评论
2个回答

0
基于这个答案:https://dev59.com/2anka4cB1Zd3GeqPTcQW#62449190 我能够通过创建自定义MPI类型转换器来传输MPI通信器。
#include <pybind11/pybind11.h>
#include <mpi.h>
#include <mpi4py/mpi4py.h>

namespace py = pybind11;

struct mpi4py_comm {
  mpi4py_comm() = default;
  mpi4py_comm(MPI_Comm value) : value(value) {}
  operator MPI_Comm () { return value; }

  MPI_Comm value;
};


namespace pybind11 { namespace detail {
  template <> struct type_caster<mpi4py_comm> {
    public:
      PYBIND11_TYPE_CASTER(mpi4py_comm, _("mpi4py_comm"));

      // Python -> C++
      bool load(handle src, bool) {
        PyObject *py_src = src.ptr();

        // Check that we have been passed an mpi4py communicator
        if (PyObject_TypeCheck(py_src, &PyMPIComm_Type)) {
          // Convert to regular MPI communicator
          value.value = *PyMPIComm_Get(py_src);
        } else {
          return false;
        }

        return !PyErr_Occurred();
      }

      // C++ -> Python
      static handle cast(mpi4py_comm src,
                         return_value_policy /* policy */,
                         handle /* parent */)
      {
        // Create an mpi4py handle
        return PyMPIComm_New(src.value);
      }
  };
}} // namespace pybind11::detail


// recieve a communicator and check if it equals MPI_COMM_WORLD
void print_comm(mpi4py_comm comm)
{
        int rank;
        std::vector<int> test; 
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);

        test.clear();
        test.resize(10, rank); 

        for (int i = 0; i != 10; ++i) {
            std::cout << test[i] << " ";
        }
        std::cout << std::endl;
}


class SomeComputation
{
    float multiplier;
    std::vector<int> test;
    MPI_Comm comm_;

public:
    void Init()
    {
        int rank;
        MPI_Comm_rank(comm_, &rank);
        test.clear();
        test.resize(10, rank);
    }
    SomeComputation(float multiplier_) : multiplier(multiplier_){}
    ~SomeComputation() { std::cout << "Destructor Called!\n"; }

    void set_comm(mpi4py_comm comm){
        this->comm_ = comm;
    }

    float compute(float input)
    {
        // std::this_thread::sleep_for(std::chrono::milliseconds((int)input * 10));
        for (int i = 0; i != 10; ++i)
        {
            std::cout << test[i] << " ";
        }
        std::cout << std::endl;
        return multiplier * input;
    }
};


mpi4py_comm get_comm()
{
  return MPI_COMM_WORLD; // Just return MPI_COMM_WORLD for demonstration
}

PYBIND11_MODULE(native, m)
{
  // import the mpi4py API
  if (import_mpi4py() < 0) {
    throw std::runtime_error("Could not load mpi4py API.");
  }

  // register the test functions
  m.def("print_comm", &print_comm, "Do something with the mpi4py communicator.");
  m.def("get_comm", &get_comm, "Return some communicator.");


    py::class_<SomeComputation>(m, "Cpp_computation")
        .def(py::init<float>()) // args of constructers are template args
        .def("set_comm", &SomeComputation::set_comm)
        .def("compute", &SomeComputation::compute)
        .def("cpp_init", &SomeComputation::Init);
}

这段代码已经成功编译并运行,但是有没有更优雅的方式来实现它呢?


0

使用 void * 作为参数对我来说编译成功了。它与 pybind11 接口兼容 (无论如何,MPI_Comm 都是一个指针)。我所需要改变的只是这个:

void set_comm(void* comm){
  this->comm_ = (MPI_Comm)comm;
}

我还向setup.py添加了MPI库和包含文件,如下所示(根据您的MPI实现替换文件夹):

ext_modules = [
    Pybind11Extension("module_name",
        ["src/main.cpp"],
        include_dirs=["/etc/alternatives/mpi-x86_64-linux-gnu"],
        library_dirs=["/usr/lib/x86_64-linux-gnu/openmpi/lib"],
        libraries=["mpi", "mpi_cxx"],
    ),
]

啊,我明白了。然而,在运行时我收到一个错误信息,指出MPI_Comm_rank未定义。 ImportError:build / mpi_lib.cpython-310-x86_64-linux-gnu.so:undefined symbol:MPI_Comm_rank - Jarwin
你使用 mpicc 编译了吗?我使用了以下命令行:CC=mpicxx CXX=mpicxx python setup.py develop - Tal Ben-Nun
尽管使用mpicc进行编译,但在从Python调用时仍会崩溃并显示相同的错误。您可以上传您的CMake文件吗?我不确定setup.py. - Jarwin
我没有使用CMake,所有的构建指令都在setup.py中。我还通过直接添加库来成功移除了使用mpicxx编译的要求。请参见编辑后的答案。我使用python setup.py develop进行了编译。如果您需要更多组件,请告诉我。将mpi_cxx添加到库中可以解决上述ImportError问题。 - Tal Ben-Nun

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接