sudo apt install locales
sudo locale-gen en_US en_US.UTF-8
sudo dpkg-reconfigure locales
如果你只想在文件读取
/写入
时获得稳定的UTF-8
支持,而不需要在每个地方都进行相同的声明,这里有两个解决方案:
1. 在运行时修补io
模块(自担风险的危险操作)
import pathlib as pathlib
import tempfile
import chardet
def patchIOWithUtf8Default():
import builtins
import importlib.util
import sys
spec = importlib.util.find_spec("io")
module = importlib.util.module_from_spec(spec)
exec(compile(spec.loader.get_source(spec.name) + """
def open(*args, **kwargs):
args = list(args)
mode = kwargs.get('mode', (args + [''])[1])
if (len(args) < 4 and 'b' not in mode) or 'encoding' in kwargs:
kwargs['encoding'] = 'utf8'
elif len(args) >= 4 and args[3] is None:
args[3] = 'utf8'
return _io.open(*args, **kwargs)
""", module.__spec__.origin, "exec"), module.__dict__)
sys.modules[module.__name__] = module
builtins.open = __import__("io").open
importlib.reload(importlib.import_module("pathlib"))
def main():
patchIOWithUtf8Default()
filename = tempfile.mktemp()
text = "Common\n常\nSense\n识\n天地玄黄"
print("Original text:", repr(text))
pathlib.Path(filename).write_text(text)
encoding = chardet.detect(open(filename, mode="rb").read())["encoding"]
print("Written encoding by pathlib:", encoding)
print("Written text by pathlib:", repr(open(filename, newline="", encoding=encoding).read()))
if __name__ == '__main__':
main()
示例输出:
Original text: 'Common\n常\nSense\n识\n天地玄黄'
Written encoding by pathlib: utf-8
Written text by pathlib: 'Common\r\n常\r\nSense\r\n识\r\n天地玄黄'
2. 使用第三方库作为pathlib的包装器
https://github.com/baijifeilong/IceSpringPathLib
pip install IceSpringPathLib
import pathlib
import tempfile
import chardet
import IceSpringPathLib
tempfile.mktemp()
filename = tempfile.mktemp()
text = "Common\n常\nSense\n识\n天地玄黄"
print("Original text:", repr(text))
pathlib.Path(filename).write_text(text)
encoding = chardet.detect(open(filename, mode="rb").read())["encoding"]
print("\nWritten text by pathlib:", repr(open(filename, newline="", encoding=encoding).read()))
print("Written encoding by pathlib:", encoding)
IceSpringPathLib.Path(filename).write_text(text)
encoding = chardet.detect(open(filename, mode="rb").read())["encoding"]
print("\nWritten text by IceSpringPathLib:", repr(open(filename, newline="", encoding=encoding).read()))
print("Written encoding by IceSpringPathLib:", encoding)
示例输出:
Original text: 'Common\n常\nSense\n识\n天地玄黄'
Written text by pathlib: 'Common\r\n常\r\nSense\r\n识\r\n天地玄黄'
Written encoding by pathlib: GB2312
Written text by IceSpringPathLib: 'Common\n常\nSense\n识\n天地玄黄'
Written encoding by IceSpringPathLib: utf-8
将操作系统的默认编码设置为UTF-8
。
例如,在Ubuntu上编辑文件/etc/default/locale
并设置:
LANG=en_US.UTF-8
LANGUAGE=en_US.UTF-8
LC_ALL=en_US.UTF-8
这个对我解决了问题。
import os
os.environ["PYTHONIOENCODING"] = "utf-8"