也许以下脚本(我的整个项目的一部分)可以帮助。该脚本定义了相当复杂的 Get-CharInfo 函数。
示例:'r Ř',0x1F44D|chr -OutUni -OutHex -OutStr -IgnoreWhiteSpace
r Ř
0x0072,0x002C,0x0158,0x0001F44D
\u0072\u002C\u0158\U0001F44D
Char CodePoint Category Description
---- --------- -------- -----------
r {U+0072, 0x72} LowercaseLetter Latin Small Letter R
Ř {U+0158, 0xC5,0x98} UppercaseLetter Latin Capital Letter R With Caron
{U+1F44D, 0xF0,0x9F,0x91,0x8D} So THUMBS UP SIGN (0xd83d,0xdc4d)
# ↑ UFF-8 ↑ name ↑ surrogates
代码(函数体末尾的基于注释的帮助):
if ( -not ('Microsofts.CharMap.UName' -as [type]) ) {
Add-Type -Name UName -Namespace Microsofts.CharMap -MemberDefinition $(
switch ("$([System.Environment]::SystemDirectory -replace
'\\', '\\')\\getuname.dll") {
{Test-Path -LiteralPath $_ -PathType Leaf} {@"
[DllImport("${_}", ExactSpelling=true, SetLastError=true)]
private static extern int GetUName(ushort wCharCode,
[MarshalAs(UnmanagedType.LPWStr)] System.Text.StringBuilder buf);
public static string Get(char ch) {
var sb = new System.Text.StringBuilder(300);
UName.GetUName(ch, sb);
return sb.ToString();
}
"@
}
default {'public static string Get(char ch) { return "???"; }'}
})
}
function Get-CharInfo {
[CmdletBinding()]
[OutputType([System.Management.Automation.PSCustomObject],
[System.Array])]
param(
[Parameter(Position=0, Mandatory, ValueFromPipeline)]
$InputObject,
[Parameter()]
[switch]$OutUni,
[Parameter()]
[switch]$OutHex,
[Parameter()]
[switch]$OutStr,
[Parameter()]
[switch]$IgnoreWhiteSpace,
[Parameter()]
[string]$UnicodeData = 'D:\Utils\CodePages\UnicodeData.txt'
)
begin {
Set-StrictMode -Version latest
if ( [string]::IsNullOrEmpty( $UnicodeData) ) { $UnicodeData = '::' }
Function ReadUnicodeRanges {
if ($Script:UnicodeFirstLast.Count -eq 0) {
$Script:UnicodeFirstLast = @'
First,Last,Category,Description
128,128,Cc-Control,Padding Character
129,129,Cc-Control,High Octet Preset
132,132,Cc-Control,Index
153,153,Cc-Control,Single Graphic Character Introducer
13312,19903,Lo-Other_Letter,CJK Ideograph Extension A
19968,40956,Lo-Other_Letter,CJK Ideograph
44032,55203,Lo-Other_Letter,Hangul Syllable
94208,100343,Lo-Other_Letter,Tangut Ideograph
101632,101640,Lo-Other_Letter,Tangut Ideograph Supplement
131072,173789,Lo-Other_Letter,CJK Ideograph Extension B
173824,177972,Lo-Other_Letter,CJK Ideograph Extension C
177984,178205,Lo-Other_Letter,CJK Ideograph Extension D
178208,183969,Lo-Other_Letter,CJK Ideograph Extension E
183984,191456,Lo-Other_Letter,CJK Ideograph Extension F
196608,201546,Lo-Other_Letter,CJK Ideograph Extension G
983040,1048573,Co-Private_Use,Plane 15 Private Use
1048576,1114109,Co-Private_Use,Plane 16 Private Use
'@ | ConvertFrom-Csv -Delimiter ',' |
ForEach-Object {
[PSCustomObject]@{
First = [int]$_.First
Last = [int]$_.Last
Category = $_.Category
Description= $_.Description
}
}
}
foreach ( $FirstLast in $Script:UnicodeFirstLast) {
if ( $FirstLast.First -le $ch -and $ch -le $FirstLast.Last ) {
$out.Category = $FirstLast.Category
$out.Description = $FirstLast.Description + $nil
break
}
}
}
$AuxHex = [System.Collections.ArrayList]::new()
$AuxStr = [System.Collections.ArrayList]::new()
$AuxUni = [System.Collections.ArrayList]::new()
$Script:UnicodeFirstLast = @()
$Script:UnicodeDataLines = @()
function ReadUnicodeData {
if ( $Script:UnicodeDataLines.Count -eq 0 -and (Test-Path $UnicodeData) ) {
$Script:UnicodeDataLines = @([System.IO.File]::ReadAllLines(
$UnicodeData, [System.Text.Encoding]::UTF8))
}
$DescrLine = $Script:UnicodeDataLines -match ('^{0:X4}\;' -f $ch)
if ( $DescrLine.Count -gt 0) {
$u0, $Descr, $Categ, $u3 = $DescrLine[0] -split ';'
$out.Category = $Categ
$out.Description = $Descr + $nil
}
}
function out {
param(
[Parameter(Position=0, Mandatory=$true )] $ch,
[Parameter(Position=1, Mandatory=$false)]$nil=''
)
if (0 -le $ch -and 0xFFFF -ge $ch) {
[void]$AuxHex.Add('0x{0:X4}' -f $ch)
$s = [char]$ch
[void]$AuxStr.Add($s)
[void]$AuxUni.Add('\u{0:X4}' -f $ch)
$out = [pscustomobject]@{
Char = $s
CodePoint = ('U+{0:X4}' -f $ch),
(([System.Text.UTF32Encoding]::UTF8.GetBytes($s) |
ForEach-Object { '0x{0:X2}' -f $_ }) -join ',')
Category = [System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($ch)
Description = [Microsofts.CharMap.UName]::Get($ch)
}
if ( $out.Description -eq 'Undefined' ) { ReadUnicodeRanges }
if ( $out.Description -eq 'Undefined' ) { ReadUnicodeData }
} elseif (0x10000 -le $ch -and 0x10FFFF -ge $ch) {
[void]$AuxHex.Add('0x{0:X8}' -f $ch)
$s = [char]::ConvertFromUtf32($ch)
[void]$AuxStr.Add($s)
[void]$AuxUni.Add('\U{0:X8}' -f $ch)
$out = [pscustomobject]@{
Char = $s
CodePoint = ('U+{0:X}' -f $ch),
(([System.Text.UTF32Encoding]::UTF8.GetBytes($s) |
ForEach-Object { '0x{0:X2}' -f $_ }) -join ',')
Category = [System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($s, 0)
Description = '???' + $nil
}
ReadUnicodeRanges
if ( $out.Description -eq ('???' + $nil) ) { ReadUnicodeData }
} else {
Write-Warning ('Character U+{0:X4} is out of range' -f $ch)
$s = $null
}
if (( $null -eq $s ) -or
( $IgnoreWhiteSpace.IsPresent -and ( $s -match '\s' ))
) {
} else {
$out
}
}
}
process {
if ( ($InputObject -as [int]) -gt 0xFFFF -and
($InputObject -as [int]) -le 0x10ffff ) {
$InputObject = [string][char]::ConvertFromUtf32($InputObject)
}
if ($null -cne ($InputObject -as [char])) {
out $([int][char]$InputObject) ''
} elseif ( $InputObject -isnot [string] -and
$null -cne ($InputObject -as [int])) {
out $([int]$InputObject) ''
} else {
$InputObject = [string]$InputObject
for ($i = 0; $i -lt $InputObject.Length; ++$i) {
if ( [char]::IsHighSurrogate($InputObject[$i]) -and
(1+$i) -lt $InputObject.Length -and
[char]::IsLowSurrogate($InputObject[$i+1])) {
$aux = ' (0x{0:x4},0x{1:x4})' -f [int]$InputObject[$i],
[int]$InputObject[$i+1]
out $([char]::ConvertToUtf32($InputObject[$i], $InputObject[1+$i])) $aux
$i++
} else {
out $([int][char]$InputObject[$i]) ''
}
}
}
}
end {
if ( $OutStr.IsPresent -or $PSBoundParameters['Verbose']) {
Write-Host -ForegroundColor Magenta -Object $($AuxStr -join '')
}
if ( $OutHex.IsPresent -or $PSBoundParameters['Verbose']) {
Write-Host -ForegroundColor Cyan -Object $($AuxHex -join ',')
}
if ( $OutUni.IsPresent -or $PSBoundParameters['Verbose']) {
Write-Host -ForegroundColor Yellow -Object $($AuxUni -join '')
}
}
}
Set-Alias -Name chr -Value Get-CharInfo
Get-CharInfo:未将术语'Get-CharInfo'识别为cmdlet名称
。 - Clamarcget_charinfo.ps1
,使用点源操作符 .激活)。 - JosefZ. Get-CharInfo.ps1
。 - JosefZ