var str = 'single words "fixed string of words"';
var astr = str.split(" "); // need fix
我希望数组变成这样:
var astr = ["single", "words", "fixed string of words"];
var str = 'single words "fixed string of words"';
var astr = str.split(" "); // need fix
我希望数组变成这样:
var astr = ["single", "words", "fixed string of words"];
接受的答案并不完全正确。它会在非空格字符(如.和-)处分隔,并将引号留在结果中。更好的方法是使用捕获组来排除引号,例如:
//The parenthesis in the regex creates a captured group within the quotes
var myRegexp = /[^\s"]+|"([^"]*)"/gi;
var myString = 'single words "fixed string of words"';
var myArray = [];
do {
//Each call to exec returns the next regex match as an array
var match = myRegexp.exec(myString);
if (match != null)
{
//Index 1 in the array is the captured group if it exists
//Index 0 is the matched text, which we use if no captured group exists
myArray.push(match[1] ? match[1] : match[0]);
}
} while (match != null);
现在,myArray将恰好包含OP所要求的内容:
single,words,fixed string of words
str.match(/\w+|"[^"]+"/g)
//single, words, "fixed string of words"
str.match(/\S+|"[^"]+"/g)
。 - Awalias'single words "fixed string of \"quoted\" words"'
即使使用了Awalias的更正,这仍然会产生:["single", "words", ""fixed", "string", ""of", "words""]
您需要处理转义引号,但不能跳出并获取转义的反斜杠。我认为最终会比您希望使用regexp处理更加复杂。 - jep这里使用了split和正则表达式匹配的混合技巧。
var str = 'single words "fixed string of words"';
var matches = /".+?"/.exec(str);
str = str.replace(/".+?"/, "").replace(/^\s+|\s+$/g, "");
var astr = str.split(" ");
if (matches) {
for (var i = 0; i < matches.length; i++) {
astr.push(matches[i].replace(/"/g, ""));
}
}
这个代码会返回预期结果,虽然使用单个正则表达式应该能够完成全部操作。
// ["single", "words", "fixed string of words"]
更新,这是S.Mark提出的方法的改进版本。
var str = 'single words "fixed string of words"';
var aStr = str.match(/\w+|"[^"]+"/g), i = aStr.length;
while(i--){
aStr[i] = aStr[i].replace(/"/g,"");
}
// ["single", "words", "fixed string of words"]
ES6支持的解决方案:
代码:
str.match(/\\?.|^$/g).reduce((p, c) => {
if(c === '"'){
p.quote ^= 1;
}else if(!p.quote && c === ' '){
p.a.push('');
}else{
p.a[p.a.length-1] += c.replace(/\\(.)/,"$1");
}
return p;
}, {a: ['']}).a
输出:
[ 'single', 'words', 'fixed string of words' ]
const parseWords = (words = '') =>
(words.match(/[^\s"]+|"([^"]*)"/gi) || []).map((word) =>
word.replace(/^"(.+(?="$))"$/, '$1'))
这个解决方案适用于双引号 (") 和单引号 ('):
代码:
str.match(/[^\s"']+|"([^"]*)"/gmi)
// ["single", "words", "fixed string of words"]
这里展示了这个正则表达式的工作方式:https://regex101.com/r/qa3KxQ/2
在我找到@dallin的答案之前(此线程:https://dev59.com/yXE85IYBdhLWcg3wXCIv#18647776),我在JavaScript中处理包含未引用和引用术语/短语的字符串时遇到了困难。
在研究这个问题时,我进行了许多测试。
由于我发现很难找到这些信息,因此我整理了相关信息(如下),这可能对其他寻求有关JavaScript处理包含引用单词的字符串的答案的人有用。
let q = 'apple banana "nova scotia" "british columbia"';
// https://dev59.com/iGct5IYBdhLWcg3wFpi1
const r = q.match(/"([^']+)"/g);
console.log('r:', r)
// r: Array [ "\"nova scotia\" \"british columbia\"" ]
console.log('r:', r.toString())
// r: "nova scotia" "british columbia"
// ----------------------------------------
// [alternate regex] https://www.regextester.com/97161
const s = q.match(/"(.*?)"/g);
console.log('s:', s)
// s: Array [ "\"nova scotia\"", "\"british columbia\"" ]
console.log('s:', s.toString())
// s: "nova scotia","british columbia"
提取[所有]未引用、引用的单词和短语:
// https://dev59.com/yXE85IYBdhLWcg3wXCIv
const t = q.match(/\w+|"[^"]+"/g);
console.log('t:', t)
// t: Array(4) [ "apple", "banana", "\"nova scotia\"", "\"british columbia\"" ]
console.log('t:', t.toString())
// t: apple,banana,"nova scotia","british columbia"
// ----------------------------------------------------------------------------
// https://dev59.com/yXE85IYBdhLWcg3wXCIv
// [@dallon 's answer (this thread)] https://dev59.com/yXE85IYBdhLWcg3wXCIv#18647776
var myRegexp = /[^\s"]+|"([^"]*)"/gi;
var myArray = [];
do {
/* Each call to exec returns the next regex match as an array. */
var match = myRegexp.exec(q); // << "q" = my query (string)
if (match != null)
{
/* Index 1 in the array is the captured group if it exists.
* Index 0 is the matched text, which we use if no captured group exists. */
myArray.push(match[1] ? match[1] : match[0]);
}
} while (match != null);
console.log('myArray:', myArray, '| type:', typeof(myArray))
// myArray: Array(4) [ "apple", "banana", "nova scotia", "british columbia" ] | type: object
console.log(myArray.toString())
// apple,banana,nova scotia,british columbia
使用集合(而不是数组):
// https://dev59.com/5F4b5IYBdhLWcg3wiiV4
var mySet = new Set(myArray);
console.log('mySet:', mySet, '| type:', typeof(mySet))
// mySet: Set(4) [ "apple", "banana", "nova scotia", "british columbia" ] | type: object
遍历集合元素:
mySet.forEach(x => console.log(x));
/* apple
* banana
* nova scotia
* british columbia
*/
// https://dev59.com/2WQo5IYBdhLWcg3wBLUo
myArrayFromSet = Array.from(mySet);
for (let i=0; i < myArrayFromSet.length; i++) {
console.log(i + ':', myArrayFromSet[i])
}
/*
0: apple
1: banana
2: nova scotia
3: british columbia
*/
侧边栏
上面的 JavaScript 响应来自于 FireFox 开发者工具 (F12,从网页中取得)。我创建了一个空白的 HTML 文件,它调用一个 .js
文件,我用 Vim 来编辑,作为我的 IDE。简单的 JavaScript IDE
根据我的测试,克隆的集合似乎是深拷贝。ES6 Map 或 Set 的浅拷贝
我也注意到了字符的消失。我认为你可以把它们包含进去 - 比如,要包括 "+" 这个词,可以使用类似 "[\w\+]" 而不是只用 "\w"。
'apple banana "nova scotia" "british columbia"'
>>"apple", "banana", "nova scotia", "british columbia"
-- 而且我还学到了一些关于JavaScript的新知识! :-) - Victoria Stuart