Javascript在webkit/safari中无法对UTF8字符串进行Base64编码

7
我试图对一个包含泰语字符的utf8字符串进行base64编码。我使用浏览器内置的btoa函数。它可以处理ascii文本,但是当涉及到泰语时,它会抛出一个INVALID_CHARACTER_ERR: DOM Exception 5异常。

以下是一个失败的示例(看起来像“n”的字符是泰语)

btoa('aก')


如果要对非ascii字符串进行base64编码,我需要做什么?

4个回答

9
var Base64 = {
  encode: function(s) {
    return btoa(unescape(encodeURIComponent(s)));
  },
  decode: function(s) {
    return decodeURIComponent(escape(atob(s)));
  }
};

嗯,我应该在这里说,escape函数将被删除。 请分享一个不使用escape的解决方案。 - Amir Hossein Baghernezad

2
我知道这已经很老了,但我最近也在寻找UTF8到Base64编码器。我在http://www.webtoolkit.info/javascript-base64.html找到了一个方便的小脚本,并在http://jsbase64.codeplex.com/找到了一个性能改进的版本。
以下是脚本内容:
var B64 = {
    alphabet: 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=',
    lookup: null,
    ie: /MSIE /.test(navigator.userAgent),
    ieo: /MSIE [67]/.test(navigator.userAgent),
    encode: function (s) {
        var buffer = B64.toUtf8(s),
            position = -1,
            len = buffer.length,
            nan0, nan1, nan2, enc = [, , , ];
        if (B64.ie) {
            var result = [];
            while (++position < len) {
                nan0 = buffer[position];
                nan1 = buffer[++position];
                enc[0] = nan0 >> 2;
                enc[1] = ((nan0 & 3) << 4) | (nan1 >> 4);
                if (isNaN(nan1))
                    enc[2] = enc[3] = 64;
                else {
                    nan2 = buffer[++position];
                    enc[2] = ((nan1 & 15) << 2) | (nan2 >> 6);
                    enc[3] = (isNaN(nan2)) ? 64 : nan2 & 63;
                }
                result.push(B64.alphabet.charAt(enc[0]), B64.alphabet.charAt(enc[1]), B64.alphabet.charAt(enc[2]), B64.alphabet.charAt(enc[3]));
            }
            return result.join('');
        } else {
            var result = '';
            while (++position < len) {
                nan0 = buffer[position];
                nan1 = buffer[++position];
                enc[0] = nan0 >> 2;
                enc[1] = ((nan0 & 3) << 4) | (nan1 >> 4);
                if (isNaN(nan1))
                    enc[2] = enc[3] = 64;
                else {
                    nan2 = buffer[++position];
                    enc[2] = ((nan1 & 15) << 2) | (nan2 >> 6);
                    enc[3] = (isNaN(nan2)) ? 64 : nan2 & 63;
                }
                result += B64.alphabet[enc[0]] + B64.alphabet[enc[1]] + B64.alphabet[enc[2]] + B64.alphabet[enc[3]];
            }
            return result;
        }
    },
    decode: function (s) {
        if (s.length % 4)
            throw new Error("InvalidCharacterError: 'B64.decode' failed: The string to be decoded is not correctly encoded.");
        var buffer = B64.fromUtf8(s),
            position = 0,
            len = buffer.length;
        if (B64.ieo) {
            var result = [];
            while (position < len) {
                if (buffer[position] < 128)
                    result.push(String.fromCharCode(buffer[position++]));
                else if (buffer[position] > 191 && buffer[position] < 224)
                    result.push(String.fromCharCode(((buffer[position++] & 31) << 6) | (buffer[position++] & 63)));
                else
                    result.push(String.fromCharCode(((buffer[position++] & 15) << 12) | ((buffer[position++] & 63) << 6) | (buffer[position++] & 63)));
            }
            return result.join('');
        } else {
            var result = '';
            while (position < len) {
                if (buffer[position] < 128)
                    result += String.fromCharCode(buffer[position++]);
                else if (buffer[position] > 191 && buffer[position] < 224)
                    result += String.fromCharCode(((buffer[position++] & 31) << 6) | (buffer[position++] & 63));
                else
                    result += String.fromCharCode(((buffer[position++] & 15) << 12) | ((buffer[position++] & 63) << 6) | (buffer[position++] & 63));
            }
            return result;
        }
    },
    toUtf8: function (s) {
        var position = -1,
            len = s.length,
            chr, buffer = [];
        if (/^[\x00-\x7f]*$/.test(s)) while (++position < len)
            buffer.push(s.charCodeAt(position));
        else while (++position < len) {
            chr = s.charCodeAt(position);
            if (chr < 128)
                buffer.push(chr);
            else if (chr < 2048)
                buffer.push((chr >> 6) | 192, (chr & 63) | 128);
            else
                buffer.push((chr >> 12) | 224, ((chr >> 6) & 63) | 128, (chr & 63) | 128);
        }
        return buffer;
    },
    fromUtf8: function (s) {
        var position = -1,
            len, buffer = [],
            enc = [, , , ];
        if (!B64.lookup) {
            len = B64.alphabet.length;
            B64.lookup = {};
            while (++position < len)
                B64.lookup[B64.alphabet.charAt(position)] = position;
            position = -1;
        }
        len = s.length;
        while (++position < len) {
            enc[0] = B64.lookup[s.charAt(position)];
            enc[1] = B64.lookup[s.charAt(++position)];
            buffer.push((enc[0] << 2) | (enc[1] >> 4));
            enc[2] = B64.lookup[s.charAt(++position)];
            if (enc[2] == 64)
                break;
            buffer.push(((enc[1] & 15) << 4) | (enc[2] >> 2));
            enc[3] = B64.lookup[s.charAt(++position)];
            if (enc[3] == 64)
                break;
            buffer.push(((enc[2] & 3) << 6) | enc[3]);
        }
        return buffer;
    }
};

免责声明:我没有特别针对泰文字符进行测试,但假设它会起作用。

Sav


2

很不幸,btoa/atob在任何标准中都没有被指定,但是firefox和webkit中的实现都无法处理多字节字符,因此即使它们现在被指定,这些内置函数也无法支持多字节字符(因为输入和输出字符串必然会改变)。

看来你唯一的选择是编写自己的base64编码+解码例程。


2

1
或者更具体地说,如果你想复制/粘贴完整的解决方案,请访问:https://developer.mozilla.org/en/DOM/window.btoa#Unicode_Strings(如果你真的对它的工作原理和原因感到好奇,Monsur Hossain在http://monsur.hossa.in/2012/07/20/utf-8-in-javascript.html上有一个很好的深入解释。) - ecmanaut

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接