在一个字符串中分离整数和文本

17

我有一个字符串,例如fullData1到fullData10,我需要将整数和文本部分分开。我该如何使用JavaScript实现此功能。


1
字符串是什么?是fullData1fullData10还是从fullData1fullData10的所有字符串? - Felix Kling
1
字符数据会一直保持相同吗? - Simon Gill
3个回答

30

按整数将字符串拆分为数组:

myArray = datastring.split(/([0-9]+)/)

然后,myArray的第一个元素将类似于fullData,第二个元素将是一些数字,如110

如果您的字符串是fullData10foo,则会得到一个数组['fullData', 10, 'foo']

您也可以使用以下方法:

  • .split(/(?=\d+)/) 生成["fullData", "1", "0"]

  • .split(/(\d+)/) 生成 ["fullData", "10", ""]

  • 此外,使用.filter(Boolean) 来去除任何空字符串("")


7
这将移除整数,只剩下2个组成部分,即fullData字符串和一个空字符串。可以改用正向先行断言,例如.split(/(?=\d+)/)编辑 你提供的额外信息使得这条评论不太准确,但要点仍然是 - split将删除您拆分的字符串。 - Andy E
9
通过进行微小的调整,您可以获得在数组中返回任何长度字符串的任何长度数字。.split(/(\d+)/)将产生["fullData", "10", ""],而上面的评论将产生["fullData", "1", "0"],而这个答案将产生["fullData", ""] - Joel Worsham
8
加上.filter(Boolean)可以让结果更加简洁。使用.split(/(\d+)/).filter(Boolean)将得到["fullData", "10"] - Joel Worsham
-1,因为无论是Chrome,Firefox还是IE都没有返回关于答案所提供的示例输入的承诺结果。另外,数组使用方括号而不是花括号。+1给Joel Worsham,他的解决方案完美地解决了问题! - CodeManX
公平的评论,我不会回溯编辑答案并隐藏错误。请查看其他答案。 - Aiden Bell

5

简述

如果您的JS环境支持RegExp sticky flag,请使用它以获得最佳性能。

基准测试

以下是8种不同的实现方法,用于将数字与其他字符分开:

function naturalSplit(str) {
    'use strict';
    let arr = [];
    let split = str.split(/(\d+)/);
    for (let i in split) {
        let s = split[i];
        if (s !== "") {
            if (i % 2) {
                arr.push(+s);
            } else {
                arr.push(s);
            }
        }
    }
    return arr;
}

function naturalSplit2(str) {
    'use strict';
    return str.split(/(\d+)/)
        .map((elem, i) => {
            if (i % 2) {
                return +elem;
            }
            return elem;
        })
        .filter(elem => elem !== "");
}

function naturalSplitMapFilterUnaryPlus(str) {
    'use strict';
    return str.split(/(\d+)/)
        .map((elem, i) => i % 2 ? +elem : elem)
        .filter(elem => elem !== "");
}

function naturalSplitMapFilterNumber(str) {
    'use strict';
    return str.split(/(\d+)/)
        .map((elem, i) => i % 2 ? Number(elem) : elem)
        .filter(elem => elem !== "");
}

function naturalConcat(str) {
    'use strict';
    const arr = [];
    let i = 0;
    while (i < str.length) {
        let token = "";
        while (i < str.length && str[i] >= "0" && str[i] <= "9") {
            token += str[i];
            i++;
        }
        if (token) {
            arr.push(Number(token));
            token = "";
        }
        while (i < str.length && (str[i] < "0" || str[i] > "9")) {
            token += str[i];
            i++;
        }
        if (token) {
            arr.push(token);
        }
    }
    return arr;
}

function naturalMatch(str) {
    'use strict';
    const arr = [];
    const num_re = /^(\D+)?(\d+)?(.*)$/;
    let s = str;
    while (s) {
        const match = s.match(num_re);
        if (!match) {
            break;
        }
        if (match[1]) {
            arr.push(match[1]);
        }
        if (match[2]) {
            arr.push(Number(match[2]));
        }
        s = match[3];
    }
    return arr;
}

function naturalExecSticky(str) {
    'use strict';
    const arr = [];
    const num_re = /(\D+)?(\d+)?/y;
    let match;
    do {
        match = num_re.exec(str);
        if (match[1] !== undefined) {
            arr.push(match[1]);
        }
        if (match[2] !== undefined) {
            arr.push(Number(match[2]));
        }
    } while (match[0]);
    return arr;
}

function naturalSlice(str) {
    'use strict';
    const arr = [];
    let i = 0;
    while (i < str.length) {
        let j = 0;
        while ((i + j) < str.length && str[i + j] >= "0" && str[i + j] <= "9") {
            j++;
        }
        if (j) {
            arr.push(Number(str.substr(i, j)));
            i += j;
            j = 0;
        }
        while ((i + j) < str.length && (str[i + j] < "0" || str[i + j] > "9")) {
            j++;
        }
        if (j) {
            arr.push(str.substr(i, j));
            i += j;
        }
    }
    return arr;
}

const algorithms = [
    naturalSplit,
    naturalSplit2,
    naturalSplitMapFilterUnaryPlus,
    naturalSplitMapFilterNumber,
    naturalConcat,
    naturalSlice,
    naturalMatch,
    naturalExecSticky
];

(function(){
    'use strict';

    let randomTests = [];
    for (let i = 0; i < 100000; i++) {
        randomTests.push({str: Math.random().toString(36).slice(2)});
    }

    const tests = [
        {str: "112233", expect: [112233]},
        {str: "foo bar baz", expect: ["foo bar baz"]},
        {str: "foo11bar22baz", expect: ["foo", 11, "bar", 22, "baz"]},
        {str: "11foo22bar33baz", expect: [11, "foo", 22, "bar", 33, "baz"]},
        {str: "foo11bar22baz33", expect: ["foo", 11, "bar", 22, "baz", 33]},
        {str: "11foo22bar33baz44", expect: [11, "foo", 22, "bar", 33, "baz", 44]},
        {str: "", expect: []},
        //{str: "99999999999999999999999999999999999999999999999999999999999999999999999999999999999", expect: ""}, // number too large for JS = ?
        {str: "Li Europan 0234 lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular. Li lingues differe solmen in li 0.00 grammatica, -1e5 li pronunciation e li plu commun vocabules. Omnicos directe al desirabilite de un nov lingua franca: On refusa continuar payar custosi traductores. At solmen va 8esser necessi far uniform grammatica, pronunciation 025.35 e plu sommun paroles. Ma +234234 quande lingues coalesce, li grammatica del resultant lingue es plu simplic e 432 regulari quam ti del coalescent9 lingues. Li nov 90548 lingua franca va esser plu simplic e 23453 regulari quam li existent 234898234 Europan lingues. It va esser tam simplic23423452349819879234quam Occidental in fact, it va esser Occidental. A un Angleso it va semblar un simplificat Angles, quam un skeptic 89723894 Cambridge amico dit me que Occidental es.Li Europan lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular. Li 3,4,5,6,7,8 lingues differe solmen in li grammatica, li 495 pronunciation e li plu commun -45345 vocabules. Omnicos directe al desirabilite de un nov lingua franca: On refusa continuar payar custosi traductores. At solmen va esser necessi far uniform grammatica, pronunciation e plu sommun paroles.",
        expect: ["Li Europan ", 234, " lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular. Li lingues differe solmen in li ", 0, ".", 0, " grammatica, -", 1, "e", 5, " li pronunciation e li plu commun vocabules. Omnicos directe al desirabilite de un nov lingua franca: On refusa continuar payar custosi traductores. At solmen va ", 8, "esser necessi far uniform grammatica, pronunciation ", 25, ".", 35, " e plu sommun paroles. Ma +", 234234, " quande lingues coalesce, li grammatica del resultant lingue es plu simplic e ", 432, " regulari quam ti del coalescent", 9, " lingues. Li nov ", 90548, " lingua franca va esser plu simplic e ", 23453, " regulari quam li existent ", 234898234, " Europan lingues. It va esser tam simplic", 23423452349819879234, "quam Occidental in fact, it va esser Occidental. A un Angleso it va semblar un simplificat Angles, quam un skeptic ", 89723894, " Cambridge amico dit me que Occidental es.Li Europan lingues es membres del sam familie. Lor separat existentie es un myth. Por scientie, musica, sport etc, litot Europa usa li sam vocabular. Li ", 3, ",", 4, ",", 5, ",", 6, ",", 7, ",", 8, " lingues differe solmen in li grammatica, li ", 495, " pronunciation e li plu commun -", 45345, " vocabules. Omnicos directe al desirabilite de un nov lingua franca: On refusa continuar payar custosi traductores. At solmen va esser necessi far uniform grammatica, pronunciation e plu sommun paroles."]}
    ];

    for (let t of tests) {
        console.log('\nTest "' + t.str.slice(0, 20) + '"');
        for (let f of algorithms) {
            console.time(f.name);
            for (let i = 0; i < 1000; i++) {
                let result = f(t.str);
            }
            console.timeEnd(f.name);
        }
    }
    console.log('\nRandom tests')
    for (let f of algorithms) {
        console.time(f.name);
        for (let r of randomTests) {
            let result = f(r.str);
        }
        console.timeEnd(f.name);
    }
})();

我的测试结果

使用带有--harmony_regexps --regexp-optimization的NodeJS 5.11.0:

Test "112233"
naturalSplit: 2.817ms
naturalSplit2: 3.033ms
naturalSplitMapFilterUnaryPlus: 3.199ms
naturalSplitMapFilterNumber: 1.910ms
naturalConcat: 0.876ms
naturalSlice: 1.274ms
naturalMatch: 0.960ms
naturalExecSticky: 0.863ms

Test "foo bar baz"
naturalSplit: 1.072ms
naturalSplit2: 0.839ms
naturalSplitMapFilterUnaryPlus: 0.800ms
naturalSplitMapFilterNumber: 0.802ms
naturalConcat: 0.952ms
naturalSlice: 0.697ms
naturalMatch: 0.577ms
naturalExecSticky: 1.329ms

Test "foo11bar22baz"
naturalSplit: 3.410ms
naturalSplit2: 2.398ms
naturalSplitMapFilterUnaryPlus: 2.083ms
naturalSplitMapFilterNumber: 6.107ms
naturalConcat: 1.627ms
naturalSlice: 1.633ms
naturalMatch: 2.070ms
naturalExecSticky: 1.697ms

Test "11foo22bar33baz"
naturalSplit: 3.572ms
naturalSplit2: 2.805ms
naturalSplitMapFilterUnaryPlus: 2.691ms
naturalSplitMapFilterNumber: 2.570ms
naturalConcat: 1.990ms
naturalSlice: 1.983ms
naturalMatch: 2.474ms
naturalExecSticky: 1.591ms

Test "foo11bar22baz33"
naturalSplit: 3.439ms
naturalSplit2: 2.637ms
naturalSplitMapFilterUnaryPlus: 2.613ms
naturalSplitMapFilterNumber: 4.554ms
naturalConcat: 1.958ms
naturalSlice: 2.002ms
naturalMatch: 0.686ms
naturalExecSticky: 0.792ms

Test "11foo22bar33baz44"
naturalSplit: 3.916ms
naturalSplit2: 2.824ms
naturalSplitMapFilterUnaryPlus: 2.843ms
naturalSplitMapFilterNumber: 2.685ms
naturalConcat: 2.164ms
naturalSlice: 2.246ms
naturalMatch: 0.981ms
naturalExecSticky: 0.961ms

Test ""
naturalSplit: 1.579ms
naturalSplit2: 2.993ms
naturalSplitMapFilterUnaryPlus: 1.356ms
naturalSplitMapFilterNumber: 1.201ms
naturalConcat: 0.029ms
naturalSlice: 0.029ms
naturalMatch: 0.025ms
naturalExecSticky: 0.186ms

Test "Li Europan 0234 ling"
naturalSplit: 25.771ms
naturalSplit2: 14.735ms
naturalSplitMapFilterUnaryPlus: 14.905ms
naturalSplitMapFilterNumber: 13.707ms
naturalConcat: 90.956ms
naturalSlice: 54.905ms
naturalMatch: 20.436ms
naturalExecSticky: 5.915ms

Random tests
naturalSplit: 376.622ms
naturalSplit2: 293.722ms
naturalSplitMapFilterUnaryPlus: 286.914ms
naturalSplitMapFilterNumber: 281.534ms
naturalConcat: 234.996ms
naturalSlice: 233.745ms
naturalMatch: 100.181ms
naturalExecSticky: 100.647ms

"

naturalMatch 明显比其他函数快 - 除了 naturalExecSticky,两者速度相当,但在长字符串输入时有时甚至更优秀(4倍)。

顺便提一下:这些函数被称为 natural...,因为它们的结果对于自然排序很有用("file10" 在 "file2" 之后,而不是在字母表中的 "file1")。

"

完美的答案,感谢您抽出时间写下它。 - Darren Street

2

如果字符部分的长度是固定的,您可以使用子字符串方法轻松地将它们删除。


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接