我刚刚为这个问题开发了一个解决方案,它也适用于这里,所以我在这里提供它:
tl <- function(e) { if (is.null(e)) return(NULL); ret <- typeof(e); if (ret == 'list' && !is.null(names(e))) ret <- list(type='namedlist') else ret <- list(type=ret,len=length(e)); ret; };
mkcsv <- function(v) paste0(collapse=',',v);
keyListToStr <- function(keyList) paste0(collapse='','/',sapply(keyList,function(key) if (is.null(key)) '*' else paste0(collapse=',',key)));
extractLevelColumns <- function(
nodes,
...,
keyList=list(),
sep=NULL,
mkname=function(keyList,maxLen) paste0(collapse='.',if (is.null(sep) && maxLen == 1L) keyList[-length(keyList)] else keyList)
) {
cat(sprintf('extractLevelColumns(): %s\n',keyListToStr(keyList)));
if (length(nodes) == 0L) return(list());
tlList <- lapply(nodes,tl);
typeList <- do.call(c,lapply(tlList,`[[`,'type'));
if (length(unique(typeList)) != 1L) stop(sprintf('error: inconsistent types (%s) at %s.',mkcsv(typeList),keyListToStr(keyList)));
type <- typeList[1L];
if (type == 'namedlist') {
allKeys <- unique(do.call(c,lapply(nodes,names)));
ret <- do.call(c,lapply(allKeys,function(key) extractLevelColumns(lapply(nodes,`[[`,key),...,keyList=c(keyList,key),sep=sep,mkname=mkname)));
} else if (type == 'list') {
lenList <- do.call(c,lapply(tlList,`[[`,'len'));
maxLen <- max(lenList,na.rm=T);
allIndexes <- seq_len(maxLen);
ret <- do.call(c,lapply(allIndexes,function(index) extractLevelColumns(lapply(nodes,function(node) if (length(node) < index) NULL else node[[index]]),...,keyList=c(keyList,index),sep=sep,mkname=mkname)));
} else if (type%in%c('raw','logical','integer','double','complex','character')) {
lenList <- do.call(c,lapply(tlList,`[[`,'len'));
maxLen <- max(lenList,na.rm=T);
if (is.null(sep)) {
ret <- lapply(seq_len(maxLen),function(i) setNames(data.frame(sapply(nodes,function(node) if (length(node) < i) NA else node[[i]]),...),mkname(c(keyList,i),maxLen)));
} else {
ret <- list(setNames(data.frame(sapply(nodes,function(node) if (length(node) == 0L) NA else if (maxLen == 1L) node else paste(collapse=sep,node)),...),mkname(keyList,maxLen)));
};
} else stop(sprintf('error: unsupported type %s at %s.',type,keyListToStr(keyList)));
if (is.null(ret)) ret <- list();
ret;
};
flattenList <- function(mainList,...) do.call(cbind,extractLevelColumns(mainList,...));
执行:
mylist <- list(structure(list(Hit='True',Project='Blue',Year='2011',Rating='4',Launch='26 Jan 2012',ID='19',Dept='1, 2, 4'),.Names=c('Hit','Project','Year','Rating','Launch','ID','Dept')),structure(list(Hit='False',Error='Record not found'),.Names=c('Hit','Error')),structure(list(Hit='True',Project='Green',Year='2004',Rating='8',Launch='29 Feb 2004',ID='183',Dept='6, 8'),.Names=c('Hit','Project','Year','Rating','Launch','ID','Dept')));
df <- flattenList(mylist);
df;
我的函数比1.9.6版本的data.table::rbindlist()
更加强大,因为它可以处理任意层级的嵌套和不同长度的向量。在相关问题中,我的函数能够正确地将OP的列表展平为数据框,但是data.table::rbindlist()
会出现错误:"Error in rbindlist(jsonRList, fill = T) : Column 4 of item 16 is length 2, inconsistent with first column of that item which is length 1. rbind/rbindlist doesn't recycle as it already expects each item to be a uniform list, data.frame or data.table"
。
rbind/rbindlist
的问题,因为它已经期望每个项目是均匀的列表、数据框或数据表,所以无法循环利用。 - msoderstrom