原来你想在node.js中使用原始字节,这里有一个模块。如果你是一个真正的巫师,你可以仅使用javascript字符串来实现,但这更难且效率远低于使用模块。
var b = new Buffer(1);
b[0] = 213;
console.log(b.toString());
var b = new Buffer(3);
b[0] = 0xE2;
b[1] = 0x98;
b[2] = 0x85;
console.log(b.toString());
print chr(213) # 在控制台上显示 �
这段代码打印了一个原始字节 (0xD5
),在UTF-8下被解析成无效的UTF-8字节序列,因此显示为替代字符(�)。
这里的UTF-8解释不相关,您可能只想要原始字节。
要在javascript中创建原始字节,可以使用UInt8Array
。
var a = new Uint8Array(1)
a[0] = 213
您可以选择将原始字节解释为utf-8格式:
console.log( utf8decode(a));
function utf8decode(uint8array) {
var codePoints = [],
i = 0,
byte, codePoint, len = uint8array.length;
for (i = 0; i < len; ++i) {
byte = uint8array[i];
if ((byte & 0xF8) === 0xF0 && len > i + 3) {
codePoint = ((byte & 0x7) << 18) | ((uint8array[++i] & 0x3F) << 12) | ((uint8array[++i] & 0x3F) << 6) | (uint8array[++i] & 0x3F);
if (!(0xFFFF < codePoint && codePoint <= 0x10FFFF)) {
codePoints.push(0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD);
} else {
codePoints.push(codePoint);
}
} else if ((byte & 0xF0) === 0xE0 && len > i + 2) {
codePoint = ((byte & 0xF) << 12) | ((uint8array[++i] & 0x3F) << 6) | (uint8array[++i] & 0x3F);
if (!(0x7FF < codePoint && codePoint <= 0xFFFF)) {
codePoints.push(0xFFFD, 0xFFFD, 0xFFFD);
} else {
codePoints.push(codePoint);
}
} else if ((byte & 0xE0) === 0xC0 && len > i + 1) {
codePoint = ((byte & 0x1F) << 6) | ((uint8array[++i] & 0x3F));
if (!(0x7F < codePoint && codePoint <= 0x7FF)) {
codePoints.push(0xFFFD, 0xFFFD);
} else {
codePoints.push(codePoint);
}
} else if ((byte & 0x80) === 0x00) {
codePoints.push(byte & 0x7F);
} else {
codePoints.push(0xFFFD);
}
}
return String.fromCharCode.apply(String, codePoints);
}
你最有可能要做的事情与试图将字节解释为utf8无关。
另一个例子:
//UTF-8 For the black star U+2605 ★:
var a = new Uint8Array(3);
a[0] = 0xE2;
a[1] = 0x98;
a[2] = 0x85;
utf8decode(a) === String.fromCharCode(0x2605) //True
utf8decode(a) // ★
在Python 2.7(Ubuntu)中:
print chr(0xE2) + chr(0x98) + chr(0x85)
#prints ★
String.fromCharCode
无法满足您的需求,可以添加更多解释。如果您仅想在控制台中显示Unicode替换字符,可以使用String.fromCharCode(0xFFFD)
。请注意保持原文意思不变,并尽可能使翻译通俗易懂。 - Esailija