高效地将System.Single[,]转换为NumPy数组。

6

使用Python 3.6和Python for dotNET/pythonnet,我已经获得了一个图像数组,该数组的类型为System.Single[,]。

我想将其转换为numpy数组,以便在Python中实际处理它。我设置了一个函数来逐个元素地遍历该数组并进行转换 - 但是是否有更明智(且更快)的方法可以使用?

def MeasurementArrayToNumpy(TwoDArray):
    hBound = TwoDArray.GetUpperBound(0)
    vBound = TwoDArray.GetUpperBound(1)

    resultArray = np.zeros([hBound, vBound])

    for c in range(TwoDArray.GetUpperBound(0)):            
            for r in range(TwoDArray.GetUpperBound(1)):
                resultArray[c,r] = TwoDArray[c,r]
    return resultArray

你试过numpy.array(TwoDArray)吗? - Nils Werner
是的,我试过了。它返回:array(<System.Single[,] object at 0x0000000011501438>, dtype=object) - rbp109
那么显然它没有以NumPy理解的方式实现迭代。我猜你正在做的已经是它能做到的最好了。 - Nils Werner
你可以开始阅读这个邮件列表线程:https://mail.python.org/pipermail/pythondotnet/2014-May/001526.html - denfromufa
1
你可以在这里找到一些非常高效的方法:https://github.com/pythonnet/pythonnet/issues/514 - denfromufa
如何将System添加到Python环境中?使用pip install System会出现以下错误:ERROR: Could not find a version that satisfies the requirement System,ERROR: No matching distribution found for System。 - Steve Boege
3个回答

7

@denfromufa - 这是一个非常有用的链接。

建议直接使用内存拷贝,可以使用Marshal.Copy或np.frombuffer。我无法让Marshal.Copy版本正常工作 - 必须使用一些花招才能与Marshal一起使用2D数组,并且这会以某种方式改变数组的内容 - 但是对我来说,np.frombuffer版本似乎可以工作,并将完成时间缩短了大约16000倍,对于一个3296 * 2471数组(从25秒到1.50毫秒)。这对我的目的已足够。

该方法需要导入两个附加库,因此我已在下面的代码段中包含它们。

import ctypes
from System.Runtime.InteropServices import GCHandle, GCHandleType

def SingleToNumpyFromBuffer(TwoDArray):
    src_hndl = GCHandle.Alloc(TwoDArray, GCHandleType.Pinned)

    try:
        src_ptr = src_hndl.AddrOfPinnedObject().ToInt32()
        bufType = ctypes.c_float*len(TwoDArray)
        cbuf = bufType.from_address(src_ptr)
        resultArray = np.frombuffer(cbuf, dtype=cbuf._type_)
    finally:
        if src_hndl.IsAllocated: src_hndl.Free()
    return resultArray

很高兴这对你有用!随时将其作为维基贡献给项目。 - denfromufa
我正在寻找一种检查C#数组类型的方法,但是没有找到安全实现的方法。有什么提示吗? - Georg W.
我发现使用 TwoDArray.GetType().ToString() 对于动态类型检查非常有用。 - Georg W.

1

根据denfromufa's link,我认为Robert McLeod提供了最好的解决方案。他还指出使用np.frombuffer的缺点:

虽然可以使用np.frombuffer进行零拷贝,但这样会导致Python垃圾回收器和C#垃圾回收器都管理内存,使得内存混乱。

以下是Robert McLeod在Github问题中的代码片段:

import numpy as np
import ctypes
import clr, System
from System import Array, Int32
from System.Runtime.InteropServices import GCHandle, GCHandleType

_MAP_NP_NET = {
    np.dtype('float32'): System.Single,
    np.dtype('float64'): System.Double,
    np.dtype('int8')   : System.SByte,
    np.dtype('int16')  : System.Int16,
    np.dtype('int32')  : System.Int32,
    np.dtype('int64')  : System.Int64,
    np.dtype('uint8')  : System.Byte,
    np.dtype('uint16') : System.UInt16,
    np.dtype('uint32') : System.UInt32,
    np.dtype('uint64') : System.UInt64,
    np.dtype('bool')   : System.Boolean,
}
_MAP_NET_NP = {
    'Single' : np.dtype('float32'),
    'Double' : np.dtype('float64'),
    'SByte'  : np.dtype('int8'),
    'Int16'  : np.dtype('int16'), 
    'Int32'  : np.dtype('int32'),
    'Int64'  : np.dtype('int64'),
    'Byte'   : np.dtype('uint8'),
    'UInt16' : np.dtype('uint16'),
    'UInt32' : np.dtype('uint32'),
    'UInt64' : np.dtype('uint64'),
    'Boolean': np.dtype('bool'),
}

def asNumpyArray(netArray):
    '''
    Given a CLR `System.Array` returns a `numpy.ndarray`.  See _MAP_NET_NP for 
    the mapping of CLR types to Numpy dtypes.
    '''
    dims = np.empty(netArray.Rank, dtype=int)
    for I in range(netArray.Rank):
        dims[I] = netArray.GetLength(I)
    netType = netArray.GetType().GetElementType().Name

    try:
        npArray = np.empty(dims, order='C', dtype=_MAP_NET_NP[netType])
    except KeyError:
        raise NotImplementedError("asNumpyArray does not yet support System type {}".format(netType) )

    try: # Memmove 
        sourceHandle = GCHandle.Alloc(netArray, GCHandleType.Pinned)
        sourcePtr = sourceHandle.AddrOfPinnedObject().ToInt64()
        destPtr = npArray.__array_interface__['data'][0]
        ctypes.memmove(destPtr, sourcePtr, npArray.nbytes)
    finally:
        if sourceHandle.IsAllocated: sourceHandle.Free()
    return npArray

def asNetArray(npArray):
    '''
    Given a `numpy.ndarray` returns a CLR `System.Array`.  See _MAP_NP_NET for 
    the mapping of Numpy dtypes to CLR types.

    Note: `complex64` and `complex128` arrays are converted to `float32` 
    and `float64` arrays respectively with shape [m,n,...] -> [m,n,...,2]
    '''
    dims = npArray.shape
    dtype = npArray.dtype
    # For complex arrays, we must make a view of the array as its corresponding 
    # float type.
    if dtype == np.complex64:
        dtype = np.dtype('float32')
        dims.append(2)
        npArray = npArray.view(np.float32).reshape(dims)
    elif dtype == np.complex128:
        dtype = np.dtype('float64')
        dims.append(2)
        npArray = npArray.view(np.float64).reshape(dims)

    netDims = Array.CreateInstance(Int32, npArray.ndim)
    for I in range(npArray.ndim):
        netDims[I] = Int32(dims[I])
    
    if not npArray.flags.c_contiguous:
        npArray = npArray.copy(order='C')
    assert npArray.flags.c_contiguous

    try:
        netArray = Array.CreateInstance(_MAP_NP_NET[dtype], netDims)
    except KeyError:
        raise NotImplementedError("asNetArray does not yet support dtype {}".format(dtype))

    try: # Memmove 
        destHandle = GCHandle.Alloc(netArray, GCHandleType.Pinned)
        sourcePtr = npArray.__array_interface__['data'][0]
        destPtr = destHandle.AddrOfPinnedObject().ToInt64()
        ctypes.memmove(destPtr, sourcePtr, npArray.nbytes)
    finally:
        if destHandle.IsAllocated: destHandle.Free()
    return netArray

if __name__ == '__main__':
    from time import perf_counter
    import matplotlib.pyplot as plt
    import psutil

    tries = 1000
    foo = np.full([1024,1024], 2.5, dtype='float32')


    netMem = np.zeros(tries)
    t_asNet = np.zeros(tries)
    netFoo = asNetArray( foo ) # Lazy loading makes the first iteration very slow
    for I in range(tries):
        t0 = perf_counter()
        netFoo = asNetArray( foo )
        t_asNet[I] = perf_counter() - t0
        netMem[I] = psutil.virtual_memory().free / 2.0**20

    t_asNumpy = np.zeros(tries)
    numpyMem = np.zeros(tries)
    unNetFoo = asNumpyArray( netFoo ) # Lazy loading makes the first iteration very slow
    for I in range(tries):
        t0 = perf_counter()
        unNetFoo = asNumpyArray( netFoo )
        t_asNumpy[I] = perf_counter() - t0
        numpyMem[I] = psutil.virtual_memory().free / 2.0**20

    # Convert times to milliseconds
    t_asNet *= 1000
    t_asNumpy *= 1000
    np.testing.assert_array_almost_equal( unNetFoo, foo )
    print( "Numpy to .NET converted {} bytes in {:.3f} +/- {:.3f} ms (mean: {:.1f} ns/ele)".format( \
        foo.nbytes, t_asNet.mean(), t_asNet.std(), t_asNet.mean()/foo.size*1e6 ) )
    print( ".NET to Numpy converted {} bytes in {:.3f} +/- {:.3f} ms (mean: {:.1f} ns/ele)".format( \
        foo.nbytes, t_asNumpy.mean(), t_asNumpy.std(), t_asNumpy.mean()/foo.size*1e6 ) )

    plt.figure()
    plt.plot(np.arange(tries), netMem, '-', label='asNetArray')
    plt.plot(np.arange(tries), numpyMem, '-', label='asNumpyArray')
    plt.legend(loc='best')
    plt.ylabel('Free memory (MB)')
    plt.xlabel('Iteration')
    plt.show(block=True)

值得一提的是,pythonnet有一个新的实验性功能,看起来很有前途:编解码器。只有在构建源代码并成功理解文档时才相关:

0

我修改了rbp109函数,使其可以与类型为System.Int32 [,,]的RGB图像一起使用。然后重新调整numpy数组的形状,以便在opencv窗口中显示图像。

def net2Numpy(net_img,width,height):  

    src_hndl = GCHandle.Alloc(net_img, GCHandleType.Pinned)
    try:
        src_ptr = src_hndl.AddrOfPinnedObject().ToInt32()
        bufType = ctypes.c_int*len(net_img)
        cbuf = bufType.from_address(src_ptr)
        resultArray = np.frombuffer(cbuf, dtype=cbuf._type_)
    finally:
        if src_hndl.IsAllocated: src_hndl.Free()

    resultArray = resultArray.astype(dtype=np.uint8)
    resultArray = resultArray.reshape((height,width,3),order='C')

    return resultArray

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接