如何在Matlab中查找连通组件?

5

数组 A =

 2     3
 2     5
 4     8
 5     6
 7     8

我希望得到结果为“conidx = [2 3 5 6]和[4 7 8]”。
其中,[2 3] 的一个值存在于第二行,
[2 5] 的一个值存在于第四行,
因此,[2 3]、[2 5] 和 [5 6] 是相连的,
最后我可以得到连接的索引为 [2 3 5 6]。
否则,[4 8] 的一个值存在于第五行,
所以 [4 8] 和 [7 8] 是相连的,最终我可以获得连接的索引为 [4 7 8]。
[3] <--> [2] <--> [5] <--> [6] 和 [4] <--> [8] <--> [7]

1
嗨,Shai,你的帮助对我来说非常重要。谢谢。 - Minho
2个回答

12

构建一个图并使用graphconncomp

G = sparse( A(:,1), A(:,2), 1, max(A(:)), max(A(:)) );
G = G + G.'; %' make graph undirected
[S C] = graphconncomp( G ); % find connected components

如果您没有生物信息学工具箱

我在mex中实现的graphconncomp:


graph_conn_comp.m代码:


function [l c] = graph_conn_comp(sA)
% 
%  Computing connected components of an undirected graph - assuming sA is symmetric
% 
%  Usage:
%   [l c] = graph_conn_comp(sA);
% 
%  Inputs:
%   sA - sparse adjacency matrix (for directed graph - does not have to be symmetric)
% 
%  Outputs:
%   l - components labels
%   c - number of connected components
% 
% 
%  Compile using:
%  >> mex -O -largeArrayDims graph_conn_comp_mex.cpp
% 
% 

sA = spfun(@(x) ones(size(x)),sA);
if ~isequal(sA, sA')
    [ii jj] = find(sA);
    sA = sparse([ii jj],[jj ii], ones(1, 2*numel(ii)), size(sA,1), size(sA,2));
end
[l c] = graph_conn_comp_mex(sA);
l = double(l); % make it compatible of the rest of Matlab

使用Matlab编译graph_conn_comp_mex.cpp的代码。

>> mex -largeArrayDims -O graph_conn_comp_mex.cpp


#include "mex.h"
#include <string.h> // for memset
/*
 * Computing connected components of an undirected graph - assuming sA is symmetric
 *
 * Usage:
 *  [l c] = graph_conn_comp_mex(sA);
 *
 * Inputs:
 *  sA - sparse adjacency matrix (for directed graph - does not have to be symmetric)
 *
 * Outputs:
 *  l - components labels
 *  c - number of connected components
 *
 *
 * Compile using:
 * >> mex -O -largeArrayDims graph_conn_comp_mex.cpp
 *
 */
#line   __LINE__  "graph_conn_comp_mex"
#define     STR(s)      #s  
#define     ERR_CODE(a,b)   a ":" "line_" STR(b)
// inputs
enum {
    AIN = 0,
    NIN };
// outputs
enum {
    LOUT = 0,
    COUT,
    NOUT };
void
ConnComp(const mxArray* sA, unsigned int* pl, const double* pnc, mwIndex start_node);
mwIndex
FindUnLabeled(unsigned int* pl, mwIndex n);

void mexFunction( 
    int nout,
    mxArray* pout[],
    int nin,
    const mxArray* pin[])  {
    if ( nin != NIN )
         mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"must have %d inputs", NIN);
    if (nout==0)
        return;
    if (nout != NOUT )
         mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"must have exactly %d output", NOUT);

    if ( mxIsComplex(pin[AIN]) || !mxIsSparse(pin[AIN]) )
        mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"sA must be real sparse matrix");
    if ( mxGetM(pin[AIN]) != mxGetN(pin[AIN]) )
        mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"sA must be square matrix");

    mwIndex n = mxGetM(pin[AIN]); // number of variables
    mwIndex ii(0);
    // allocate outputs
    pout[LOUT] = mxCreateNumericMatrix(1, n, mxUINT32_CLASS, mxREAL);
    unsigned int* pl = (unsigned int*)mxGetData(pout[LOUT]);
    memset(pl, 0, n*sizeof(unsigned int)); // set initial labels to zero
    unsigned int cl = 0;
    // number of components
    pout[COUT] = mxCreateDoubleMatrix(1, 1, mxREAL);
    double* pnc = mxGetPr(pout[COUT]); 
    pnc[0] = 0; // number of components
    ii = 0;
    do {
        ConnComp(pin[AIN], pl, pnc, ii); // find conn comp
        pnc[0]++;        
        ii = FindUnLabeled(pl, n);
    } while ( ii < n );
}

mwIndex
FindUnLabeled(unsigned int* pl, mwIndex n) {
    for ( mwIndex ii(0); ii<n ; ii++ ){
        if ( pl[ii]==0 ) {
            return ii;
        }
    }
    return n+1;
}

void
ConnComp(const mxArray* sA, unsigned int* pl, const double* pnc, mwIndex start_node) {
    mwIndex n = mxGetM(sA);
    unsigned int curr_label = (unsigned int)(pnc[0]+1);
    mwIndex *stack = (mwIndex*)mxMalloc( (n)*sizeof(mwIndex) ); 
    memset(stack, 0, (n)*sizeof(mwIndex));
    mwIndex sp(0); // stack pointer
    // put start_node on top of stack after labeling it
    pl[start_node]=curr_label;
    stack[sp] = start_node;
    sp++;    
    mwIndex* pir = mxGetIr(sA);
    mwIndex* pjc = mxGetJc(sA);
    mwIndex  ii(0), neighbor(0);
    while ( sp > 0 ) {       
        // pop start_label from stack
        sp--;
        start_node = stack[sp];
        for ( ii = pjc[start_node] ; ii < pjc[start_node+1] ; ii++ ) {
            neighbor = pir[ii];
            if (pl[neighbor]==0) { // unlabeled
                pl[neighbor] = curr_label; // label it
                // push it into stack
                stack[sp] = neighbor;
                sp++;
            } else {
                if (pl[neighbor]!=curr_label)
                    mexErrMsgIdAndTxt(ERR_CODE(__FILE__, __LINE__),"Got mixed labeling %d <-> %d",pl[neighbor], curr_label);
            }
        }
    }
    mxFree(stack);
}

@EitanT确实。但是实现起来并不太难。我自己不久前就用mex实现了它。 - Shai
当然。此外,我建议您使用索引而不是 A 的实际值,以提高内存效率... - Eitan T
@EitanT,您能否详细说明一下这个内存效率问题? - Shai
@如果A的值为2000而不是2,你的邻接矩阵G会变得过于臃肿。 - Eitan T
@Eitan,是的,在预处理中将A进行去重很重要。我假设A已经被“压缩”了... - Shai
@EitanT 无论如何,现在您有了一个graph_conn_comp的mex实现,时间复杂度为O(我不需要自己实现它) ;-) - Shai

11

对于那些没有graphconncomp并且不想编译Shai的mex函数的人,我已经在博客上发布了graphconncomp的纯Matlab替代方案,作为gptoolbox的一部分。

function [S,C] = conncomp(G)
  [p,q,r] = dmperm(G'+speye(size(G)));
  S = numel(r)-1;
  C = cumsum(full(sparse(1,r(1:end-1),1,1,size(G,1))));
  C(p) = C;
end

对于大型稀疏输入,它比graphconncomp快约10倍。


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接