我该在哪里找到一些用于解析CSV数据的JavaScript代码?
我该在哪里找到一些用于解析CSV数据的JavaScript代码?
您可以使用在此博客文章中提到的CSVToArray()函数。
console.log(CSVToArray(`"foo, the column",bar
2,3
"4, the value",5`));
// ref: https://dev59.com/kXM_5IYBdhLWcg3wp0wX#1293163
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray( strData, strDelimiter ){
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec( strData )){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (
strMatchedDelimiter.length &&
strMatchedDelimiter !== strDelimiter
){
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push( [] );
}
var strMatchedValue;
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
"([^\"\\"
应该改为 "([^\\"
。否则,在未加引号的值中,任何位置的双引号都会过早地结束它。我通过艰难的方式发现了这个问题... - Walter Tross这是一个jQuery插件,旨在作为将CSV解析为JavaScript数据的端到端解决方案。它处理了RFC 4180中提出的每一个边缘情况,以及一些在Excel/Google电子表格导出时会出现(即主要涉及空值)但规范中缺失的情况。
示例:
track,artist,album,year
Dangerous,'Busta Rhymes','When Disaster Strikes',1997
// Calling this
music = $.csv.toArrays(csv)
// Outputs...
[
["track", "artist", "album", "year"],
["Dangerous", "Busta Rhymes", "When Disaster Strikes", "1997"]
]
console.log(music[1][2]) // Outputs: 'When Disaster Strikes'
更新:
哦,是的,我也应该提到它是完全可配置的。
music = $.csv.toArrays(csv, {
delimiter: "'", // Sets a custom value delimiter character
separator: ';', // Sets a custom field separator character
});
更新2:
现在它也可以在Node.js上使用jQuery,因此您可以使用同一库进行客户端或服务器端解析。
更新3:
自Google Code关闭以来,jquery-csv已迁移到GitHub。
免责声明:我也是jQuery-CSV的作者。
csv
是否指的是.csv文件名
?我对一个好的JS / JQuery工具来解析CSV文件很感兴趣。 - bouncingHippo这是一个非常简单的CSV解析器,可以处理带有逗号、换行符和转义双引号的引用字段。没有分割或正则表达式。它以每次扫描输入字符串1-2个字符,并构建一个数组。
在http://jsfiddle.net/vHKYH/上测试它。
function parseCSV(str) {
const arr = [];
let quote = false; // 'true' means we're inside a quoted field
// Iterate over each character, keep track of current row and column (of the returned array)
for (let row = 0, col = 0, c = 0; c < str.length; c++) {
let cc = str[c], nc = str[c+1]; // Current character, next character
arr[row] = arr[row] || []; // Create a new row if necessary
arr[row][col] = arr[row][col] || ''; // Create a new column (start with empty string) if necessary
// If the current character is a quotation mark, and we're inside a
// quoted field, and the next character is also a quotation mark,
// add a quotation mark to the current column and skip the next character
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }
// If it's just one quotation mark, begin/end quoted field
if (cc == '"') { quote = !quote; continue; }
// If it's a comma and we're not in a quoted field, move on to the next column
if (cc == ',' && !quote) { ++col; continue; }
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;
}
return arr;
}
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; } if (cc == '\n' && !quote) { ++row; col = 0; continue; }
- user655063我在一份电子表格项目中有一个实现。
这段代码尚未经过彻底的测试,但欢迎任何人使用。
正如一些答案所指出的那样,如果您确实拥有DSV或TSV文件,那么您的实现可以更加简单,因为它们不允许在值中使用记录和字段分隔符。另一方面,CSV实际上可以在一个字段内包含逗号和换行符,这破坏了大多数基于正则表达式和分隔符的方法。
var CSV = {
parse: function(csv, reviver) {
reviver = reviver || function(r, c, v) { return v; };
var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;
while (c < cc) {
table.push(row = []);
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c]) {
start = end = c;
if ('"' === chars[c]){
start = end = ++c;
while (c < cc) {
if ('"' === chars[c]) {
if ('"' !== chars[c+1]) {
break;
}
else {
chars[++c] = ''; // unescape ""
}
}
end = ++c;
}
if ('"' === chars[c]) {
++c;
}
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
++c;
}
} else {
while (c < cc && '\r' !== chars[c] && '\n' !== chars[c] && ',' !== chars[c]) {
end = ++c;
}
}
row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));
if (',' === chars[c]) {
++c;
}
}
if ('\r' === chars[c]) {
++c;
}
if ('\n' === chars[c]) {
++c;
}
}
return table;
},
stringify: function(table, replacer) {
replacer = replacer || function(r, c, v) { return v; };
var csv = '', c, cc, r, rr = table.length, cell;
for (r = 0; r < rr; ++r) {
if (r) {
csv += '\r\n';
}
for (c = 0, cc = table[r].length; c < cc; ++c) {
if (c) {
csv += ',';
}
cell = replacer(r, c, table[r][c]);
if (/[,\r\n"]/.test(cell)) {
cell = '"' + cell.replace(/"/g, '""') + '"';
}
csv += (cell || 0 === cell) ? cell : '';
}
}
return csv;
}
};
console.log(CSV.parse("first,last,age\r\njohn,doe,"));
- skibulkconsole.log(CSV.parse("0,,2,3"));
- skibulkif ('\ r' === chars [c]) {...}
之前添加以下内容:if (end === c-1) {row.push(reviver(table.length-1, row.length, '')); }
- coderforlifecsvToArray v1.3
一款紧凑(645字节),但符合RFC4180标准的函数,将CSV字符串转换为2D数组。
https://code.google.com/archive/p/csv-to-array/downloads
常见用法:jQuery
$.ajax({
url: "test.csv",
dataType: 'text',
cache: false
}).done(function(csvAsString){
csvAsArray=csvAsString.csvToArray();
});
常用方式:JavaScript
csvAsArray = csvAsString.csvToArray();
覆盖字段分隔符
csvAsArray = csvAsString.csvToArray("|");
覆盖记录分隔符
csvAsArray = csvAsString.csvToArray("", "#");
覆盖跳过标题
csvAsArray = csvAsString.csvToArray("", "", 1);
覆盖所有
csvAsArray = csvAsString.csvToArray("|", "#", 1);
csvAsArray = csvAsString.csvToArray({fSep: "|"});
覆盖记录分隔符
csvAsArray = csvAsString.csvToArray({rSep: "#"});
覆盖跳过标题
csvAsArray = csvAsString.csvToArray({head: true});
全部覆盖
csvAsArray = csvAsString.csvToArray({fSep: "|", rSep: "#", head: true});
- Faisal Khan这是我的PEG(.js)语法规则,它似乎可以很好地处理RFC 4180(即它处理了http://en.wikipedia.org/wiki/Comma-separated_values上的示例):
start
= [\n\r]* first:line rest:([\n\r]+ data:line { return data; })* [\n\r]* { rest.unshift(first); return rest; }
line
= first:field rest:("," text:field { return text; })*
& { return !!first || rest.length; } // ignore blank lines
{ rest.unshift(first); return rest; }
field
= '"' text:char* '"' { return text.join(''); }
/ text:[^\n\r,]* { return text.join(''); }
char
= '"' '"' { return '"'; }
/ [^"]
你可以在http://jsfiddle.net/knvzk/10或http://pegjs.majda.cz/online上尝试。可以从https://gist.github.com/3362830下载生成的解析器。
这里是另一种解决方案。它使用了:
对于以下输入字符串:
"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,
[
"This is, a value",
"Hello",
4,
-123,
3.1415,
"This is also, possible",
true,
null
]
这里是我对parseCSVLine()的实现,包含可运行的代码片段:
function parseCSVLine(text) {
return text.match( /\s*(\"[^"]*\"|'[^']*'|[^,]*)\s*(,|$)/g ).map( function (text) {
let m;
if (m = text.match(/^\s*,?$/)) return null; // null value
if (m = text.match(/^\s*\"([^"]*)\"\s*,?$/)) return m[1]; // Double Quoted Text
if (m = text.match(/^\s*'([^']*)'\s*,?$/)) return m[1]; // Single Quoted Text
if (m = text.match(/^\s*(true|false)\s*,?$/)) return m[1] === "true"; // Boolean
if (m = text.match(/^\s*((?:\+|\-)?\d+)\s*,?$/)) return parseInt(m[1]); // Integer Number
if (m = text.match(/^\s*((?:\+|\-)?\d*\.\d*)\s*,?$/)) return parseFloat(m[1]); // Floating Number
if (m = text.match(/^\s*(.*?)\s*,?$/)) return m[1]; // Unquoted Text
return text;
} );
}
let data = `"This is\, a value",Hello,4,-123,3.1415,'This is also\, possible',true,`;
let obj = parseCSVLine(data);
console.log( JSON.stringify( obj, undefined, 2 ) );
text.match( /\s*(\".*?\"|'.*?'|[^,]+|)\s*(,|$)/g )
。我不得不在第一个捕获组中添加最后一个 |
,以允许 CSV 中的空单元格。 - cstrattext.match(/\s*(".*?"|'.*?'|[^,]+|(?!$))\s*(,|$)/g)
这又创建了另一个问题,即我不能有一个空的最后一格。我可能会回到原始修复方法,并在最后一列中过滤掉额外的空单元格。 - cstrat[^,]+
改成了 [^,]*
,这样它现在可以匹配空单元格并将其返回为 null。我已经更新了示例以反映这一点。@BoruchHashem 我用 (\"[^"]*\")
替换了 (\".*?\")
,这样它现在可以匹配多行双引号字符串。我对单引号字符串也做了类似的更改。 - Stephen Quan这是我的简单的纯JavaScript代码:
let a = 'one,two,"three, but with a comma",four,"five, with ""quotes"" in it.."'
console.log(splitQuotes(a))
function splitQuotes(line) {
if(line.indexOf('"') < 0)
return line.split(',')
let result = [], cell = '', quote = false;
for(let i = 0; i < line.length; i++) {
char = line[i]
if(char == '"' && line[i+1] == '"') {
cell += char
i++
} else if(char == '"') {
quote = !quote;
} else if(!quote && char == ',') {
result.push(cell)
cell = ''
} else {
cell += char
}
if ( i == line.length-1 && cell) {
result.push(cell)
}
}
return result
}
我不确定为什么我无法使用 Kirtan的示例。似乎在空字段或可能带有尾随逗号的字段上失败了...
这个代码似乎可以处理两者。
我没有编写解析器代码,只是对解析器函数进行了包装,以使其适用于文件。查看属性。
var Strings = {
/**
* Wrapped CSV line parser
* @param s String delimited CSV string
* @param sep Separator override
* @attribution: http://www.greywyvern.com/?post=258 (comments closed on blog :( )
*/
parseCSV : function(s,sep) {
// https://dev59.com/43M_5IYBdhLWcg3w9oIA
var universalNewline = /\r\n|\r|\n/g;
var a = s.split(universalNewline);
for(var i in a){
for (var f = a[i].split(sep = sep || ","), x = f.length - 1, tl; x >= 0; x--) {
if (f[x].replace(/"\s+$/, '"').charAt(f[x].length - 1) == '"') {
if ((tl = f[x].replace(/^\s+"/, '"')).length > 1 && tl.charAt(0) == '"') {
f[x] = f[x].replace(/^\s*"|"\s*$/g, '').replace(/""/g, '"');
} else if (x) {
f.splice(x - 1, 2, [f[x - 1], f[x]].join(sep));
} else f = f.shift().split(sep).concat(f);
} else f[x].replace(/""/g, '"');
} a[i] = f;
}
return a;
}
}
正则表达式来拯救!这几行代码根据RFC 4180标准处理带有嵌入逗号、引号和换行符的正确引用字段。
function parseCsv(data, fieldSep, newLine) {
fieldSep = fieldSep || ',';
newLine = newLine || '\n';
var nSep = '\x1D';
var qSep = '\x1E';
var cSep = '\x1F';
var nSepRe = new RegExp(nSep, 'g');
var qSepRe = new RegExp(qSep, 'g');
var cSepRe = new RegExp(cSep, 'g');
var fieldRe = new RegExp('(?<=(^|[' + fieldSep + '\\n]))"(|[\\s\\S]+?(?<![^"]"))"(?=($|[' + fieldSep + '\\n]))', 'g');
var grid = [];
data.replace(/\r/g, '').replace(/\n+$/, '').replace(fieldRe, function(match, p1, p2) {
return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
}).split(/\n/).forEach(function(line) {
var row = line.split(fieldSep).map(function(cell) {
return cell.replace(nSepRe, newLine).replace(qSepRe, '"').replace(cSepRe, ',');
});
grid.push(row);
});
return grid;
}
const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
const separator = ','; // field separator, default: ','
const newline = ' <br /> '; // newline representation in case a field contains newlines, default: '\n'
var grid = parseCsv(csv, separator, newline);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]