更新至Swift 3
字符串和字符
对于未来的几乎所有访问此问题的人,String
和Character
将是您的答案。
直接在代码中设置Unicode值:
var str: String = "I want to visit 北京, Москва, मुंबई, القاهرة, and 서울시. "
var character: Character = ""
使用十六进制设置值
var str: String = "\u{61}\u{5927}\u{1F34E}\u{3C0}" // a大π
var character: Character = "\u{65}\u{301}" // é = "e" + accent mark
请注意,Swift字符可以由多个Unicode代码点组成,但看起来是一个单独的字符。这称为扩展字形集群。
另请参见此问题。
转换为Unicode值:
str.utf8
str.utf16
str.unicodeScalars // UTF-32
String(character).utf8
String(character).utf16
String(character).unicodeScalars
从Unicode十六进制值转换:
let hexValue: UInt32 = 0x1F34E
guard let scalarValue = UnicodeScalar(hexValue) else {
return
}
let myString = String(scalarValue)
或者也可以:
let hexValue: UInt32 = 0x1F34E
if let scalarValue = UnicodeScalar(hexValue) {
let myString = String(scalarValue)
}
更多例子
let value0: UInt8 = 0x61
let value1: UInt16 = 0x5927
let value2: UInt32 = 0x1F34E
let string0 = String(UnicodeScalar(value0))
let string1 = String(UnicodeScalar(value1))
let string2 = String(UnicodeScalar(value2))
let myHexArray = [0x43, 0x61, 0x74, 0x203C, 0x1F431]
var myString = ""
for hexValue in myHexArray {
myString.append(UnicodeScalar(hexValue))
}
print(myString)
请注意,对于UTF-8和UTF-16,转换并不总是那么容易。(请参阅UTF-8、UTF-16和UTF-32问题。)
NSString和unichar
在Swift中还可以使用NSString
和unichar
,但你应该意识到,除非你熟悉Objective C并且擅长将语法转换为Swift,否则很难找到好的文档。
另外,unichar
是一个UInt16
数组,如上所述,从UInt16
到Unicode标量值的转换并不总是容易的(也就是说,转换代理对用于表情符号和其他位于高代码平面的字符)。
自定义字符串结构
出于上述原因,我最终没有使用以上任何一种方法。相反,我编写了自己的字符串结构,这基本上是一个UInt32
数组,用于保存Unicode标量值。
同样,对大多数人来说,这不是解决方案。如果你只需要稍微扩展String
或Character
的功能,请首先考虑使用扩展(extension)。
但是,如果您确实需要专门处理Unicode标量值,可以编写自定义结构。
优点是:
- 在进行字符串操作时不需要经常切换类型(如
String
、Character
、UnicodeScalar
、UInt32
等)。
- 在Unicode操作完成后,最终转换为
String
很容易。
- 需要添加更多方法时很容易
- 简化从Java或其他语言转换代码的过程
缺点是:
- 使代码对其他Swift开发者来说不太可移植和易读
- 没有本地Swift类型那样经过完善测试和优化
- 每次需要时都要包含另一个文件
您可以自己编写,但以下是我的参考代码。最困难的部分是使其可哈希。
struct ScalarString: Sequence, Hashable, CustomStringConvertible {
fileprivate var scalarArray: [UInt32] = []
init() {
}
init(_ character: UInt32) {
self.scalarArray.append(character)
}
init(_ charArray: [UInt32]) {
for c in charArray {
self.scalarArray.append(c)
}
}
init(_ string: String) {
for s in string.unicodeScalars {
self.scalarArray.append(s.value)
}
}
func makeIterator() -> AnyIterator<UInt32> {
return AnyIterator(scalarArray.makeIterator())
}
mutating func append(_ scalar: UInt32) {
self.scalarArray.append(scalar)
}
mutating func append(_ scalarString: ScalarString) {
for scalar in scalarString {
self.scalarArray.append(scalar)
}
}
mutating func append(_ string: String) {
for s in string.unicodeScalars {
self.scalarArray.append(s.value)
}
}
func charAt(_ index: Int) -> UInt32 {
return self.scalarArray[index]
}
mutating func clear() {
self.scalarArray.removeAll(keepingCapacity: true)
}
func contains(_ character: UInt32) -> Bool {
for scalar in self.scalarArray {
if scalar == character {
return true
}
}
return false
}
var description: String {
return self.toString()
}
func endsWith() -> UInt32? {
return self.scalarArray.last
}
func indexOf(_ string: ScalarString) -> Int? {
if scalarArray.count < string.length {
return nil
}
for i in 0...(scalarArray.count - string.length) {
for j in 0..<string.length {
if string.charAt(j) != scalarArray[i + j] {
break
}
if j == string.length - 1 {
return i
}
}
}
return nil
}
mutating func insert(_ scalar: UInt32, atIndex index: Int) {
self.scalarArray.insert(scalar, at: index)
}
mutating func insert(_ string: ScalarString, atIndex index: Int) {
var newIndex = index
for scalar in string {
self.scalarArray.insert(scalar, at: newIndex)
newIndex += 1
}
}
mutating func insert(_ string: String, atIndex index: Int) {
var newIndex = index
for scalar in string.unicodeScalars {
self.scalarArray.insert(scalar.value, at: newIndex)
newIndex += 1
}
}
var isEmpty: Bool {
return self.scalarArray.count == 0
}
var hashValue: Int {
return self.scalarArray.reduce(5381) {
($0 << 5) &+ $0 &+ Int($1)
}
}
var length: Int {
return self.scalarArray.count
}
mutating func removeCharAt(_ index: Int) {
self.scalarArray.remove(at: index)
}
func removingAllInstancesOfChar(_ character: UInt32) -> ScalarString {
var returnString = ScalarString()
for scalar in self.scalarArray {
if scalar != character {
returnString.append(scalar)
}
}
return returnString
}
func removeRange(_ range: CountableRange<Int>) -> ScalarString? {
if range.lowerBound < 0 || range.upperBound > scalarArray.count {
return nil
}
var returnString = ScalarString()
for i in 0..<scalarArray.count {
if i < range.lowerBound || i >= range.upperBound {
returnString.append(scalarArray[i])
}
}
return returnString
}
func replace(_ character: UInt32, withChar replacementChar: UInt32) -> ScalarString {
var returnString = ScalarString()
for scalar in self.scalarArray {
if scalar == character {
returnString.append(replacementChar)
} else {
returnString.append(scalar)
}
}
return returnString
}
func replace(_ character: UInt32, withString replacementString: String) -> ScalarString {
var returnString = ScalarString()
for scalar in self.scalarArray {
if scalar == character {
returnString.append(replacementString)
} else {
returnString.append(scalar)
}
}
return returnString
}
func replaceRange(_ range: CountableRange<Int>, withString replacementString: ScalarString) -> ScalarString {
var returnString = ScalarString()
for i in 0..<scalarArray.count {
if i < range.lowerBound || i >= range.upperBound {
returnString.append(scalarArray[i])
} else if i == range.lowerBound {
returnString.append(replacementString)
}
}
return returnString
}
mutating func set(_ string: String) {
self.scalarArray.removeAll(keepingCapacity: false)
for s in string.unicodeScalars {
self.scalarArray.append(s.value)
}
}
func split(atChar splitChar: UInt32) -> [ScalarString] {
var partsArray: [ScalarString] = []
if self.scalarArray.count == 0 {
return partsArray
}
var part: ScalarString = ScalarString()
for scalar in self.scalarArray {
if scalar == splitChar {
partsArray.append(part)
part = ScalarString()
} else {
part.append(scalar)
}
}
partsArray.append(part)
return partsArray
}
func startsWith() -> UInt32? {
return self.scalarArray.first
}
func substring(_ startIndex: Int) -> ScalarString {
var subArray: ScalarString = ScalarString()
for i in startIndex..<self.length {
subArray.append(self.scalarArray[i])
}
return subArray
}
func substring(_ startIndex: Int, _ endIndex: Int) -> ScalarString {
var subArray: ScalarString = ScalarString()
for i in startIndex..<endIndex {
subArray.append(self.scalarArray[i])
}
return subArray
}
func toString() -> String {
var string: String = ""
for scalar in self.scalarArray {
if let validScalor = UnicodeScalar(scalar) {
string.append(Character(validScalor))
}
}
return string
}
func trim() -> ScalarString {
let space: UInt32 = 0x00000020
let tab: UInt32 = 0x00000009
let newline: UInt32 = 0x0000000A
var startIndex = self.scalarArray.count
var endIndex = 0
for i in 0..<self.scalarArray.count {
if self.scalarArray[i] != space &&
self.scalarArray[i] != tab &&
self.scalarArray[i] != newline {
startIndex = i
break
}
}
for i in stride(from: (self.scalarArray.count - 1), through: 0, by: -1) {
if self.scalarArray[i] != space &&
self.scalarArray[i] != tab &&
self.scalarArray[i] != newline {
endIndex = i + 1
break
}
}
if endIndex <= startIndex {
return ScalarString()
}
return self.substring(startIndex, endIndex)
}
func values() -> [UInt32] {
return self.scalarArray
}
}
func ==(left: ScalarString, right: ScalarString) -> Bool {
return left.scalarArray == right.scalarArray
}
func +(left: ScalarString, right: ScalarString) -> ScalarString {
var returnString = ScalarString()
for scalar in left.values() {
returnString.append(scalar)
}
for scalar in right.values() {
returnString.append(scalar)
}
return returnString
}
count(string)
返回“扩展Unicode图形簇”的数量,count(string.utf16)
返回相同字符串所需的UTF-16代码点数量(即相应的NSString
或CFString
的长度)。 (而count(string.utf8)
将返回UTF-8代码点的数量)。关于问题“我是否应该每次引用字符串时都使用suForm1.utf16这样的写法?”无法一概而论,这取决于你需要计算数量的目的。 - Martin RString
还是NSString
或其他东西? - Suragch