一个更现代的编码解决方案:
function bytesToHex(bytes) {
return Array.from(
bytes,
byte => byte.toString(16).padStart(2, "0")
).join("");
}
function stringToUTF8Bytes(string) {
return new TextEncoder().encode(string);
}
function stringToUTF16Bytes(string, littleEndian) {
const bytes = new Uint8Array(string.length * 2);
const view = new DataView(bytes.buffer);
for (let i = 0; i != string.length; i++) {
view.setUint16(i, string.charCodeAt(i), littleEndian);
}
return bytes;
}
function stringToUTF32Bytes(string, littleEndian) {
const codepoints = Array.from(string, c => c.codePointAt(0));
const bytes = new Uint8Array(codepoints.length * 4);
const view = new DataView(bytes.buffer);
for (let i = 0; i != codepoints.length; i++) {
view.setUint32(i, codepoints[i], littleEndian);
}
return bytes;
}
示例:
bytesToHex(stringToUTF8Bytes("hello 漢字 "))
bytesToHex(stringToUTF16Bytes("hello 漢字 ", false))
bytesToHex(stringToUTF16Bytes("hello 漢字 ", true))
bytesToHex(stringToUTF32Bytes("hello 漢字 ", false))
bytesToHex(stringToUTF32Bytes("hello 漢字 ", true))
对于解码来说,通常要简单得多,你只需要:
function hexToBytes(hex) {
const bytes = new Uint8Array(hex.length / 2);
for (let i = 0; i !== bytes.length; i++) {
bytes[i] = parseInt(hex.substr(i * 2, 2), 16);
}
return bytes;
}
然后使用TextDecoder
的编码参数:
new TextDecoder().decode(hexToBytes("68656c6c6f20e6bca2e5ad9720f09f918d"));
new TextDecoder("UTF-16LE").decode(hexToBytes("680065006c006c006f002000226f575b20003dd84ddc"))
new TextDecoder("UTF-16BE").decode(hexToBytes("00680065006c006c006f00206f225b570020d83ddc4d"));
以下是允许使用的编码名称列表:
https://www.w3.org/TR/encoding/#names-and-labels。
你可能会注意到UTF-32不在该列表中,这很麻烦,因此:
function bytesToStringUTF32(bytes, littleEndian) {
const view = new DataView(bytes.buffer);
const codepoints = new Uint32Array(view.byteLength / 4);
for (let i = 0; i !== codepoints.length; i++) {
codepoints[i] = view.getUint32(i * 4, littleEndian);
}
return String.fromCodePoint(...codepoints);
}
然后:
bytesToStringUTF32(hexToBytes("00000068000000650000006c0000006c0000006f0000002000006f2200005b57000000200001f44d"), false)
bytesToStringUTF32(hexToBytes("68000000650000006c0000006c0000006f00000020000000226f0000575b0000200000004df40100"), true)
("000"+hex).slice(-4)
更改为"\\u" + ("000"+hex).slice(-4)
。表达式"\u6f22\u5b57" === "漢字"
的求值结果为 true,因为在代码解析后它们是相同的。 - McDowellvar a = "\\x73\\x75\\x62\\x73\\x74\\x72"; var str = "\\u6f22\\u5b57"; String.prototype.hexDecode = function(){ var j; var hexes = this.split("\\"); var back = ""; for(j = 1; j
- martian17