有没有一种编程方法可以确定字体文件是否具有特定的Unicode字形?

33

我正在开发一个项目,生成的PDF文件可能包含相当复杂的数学和科学公式。文字采用的是Times New Roman字体, 该字体具有良好的Unicode覆盖率,但不完整。我们已经建立了一个系统,用于在TNR中没有字形的代码点中替换更完整的Unicode字体(例如大多数“奇怪”的数学符号),但我似乎找不到一种方法来查询*.ttf文件以查看给定的字形是否存在。到目前为止,我只是硬编码了一个查找表,包含哪些代码点存在,但我希望有一种自动解决方案。

我在ASP.net下使用VB.Net进行Web系统开发,但任何编程语言/环境的解决方案都会受到赞赏。

编辑:win32的解决方案看起来很好,但我需要解决的特定情况是在ASP.Net web系统中。有没有一种方法可以在不将Windows API DLL包含到我的网站中的情况下完成此操作?

6个回答

11

这里使用C#和Windows API进行尝试。

[DllImport("gdi32.dll")]
public static extern uint GetFontUnicodeRanges(IntPtr hdc, IntPtr lpgs);

[DllImport("gdi32.dll")]
public extern static IntPtr SelectObject(IntPtr hDC, IntPtr hObject);

public struct FontRange
{
    public UInt16 Low;
    public UInt16 High;
}

public List<FontRange> GetUnicodeRangesForFont(Font font)
{
    Graphics g = Graphics.FromHwnd(IntPtr.Zero);
    IntPtr hdc = g.GetHdc();
    IntPtr hFont = font.ToHfont();
    IntPtr old = SelectObject(hdc, hFont);
    uint size = GetFontUnicodeRanges(hdc, IntPtr.Zero);
    IntPtr glyphSet = Marshal.AllocHGlobal((int)size);
    GetFontUnicodeRanges(hdc, glyphSet);
    List<FontRange> fontRanges = new List<FontRange>();
    int count = Marshal.ReadInt32(glyphSet, 12);
    for (int i = 0; i < count; i++)
    {
        FontRange range = new FontRange();
        range.Low = (UInt16)Marshal.ReadInt16(glyphSet, 16 + i * 4);
        range.High = (UInt16)(range.Low + Marshal.ReadInt16(glyphSet, 18 + i * 4) - 1);
        fontRanges.Add(range);
    }
    SelectObject(hdc, old);
    Marshal.FreeHGlobal(glyphSet);
    g.ReleaseHdc(hdc);
    g.Dispose();
    return fontRanges;
}

public bool CheckIfCharInFont(char character, Font font)
{
    UInt16 intval = Convert.ToUInt16(character);
    List<FontRange> ranges = GetUnicodeRangesForFont(font);
    bool isCharacterPresent = false;
    foreach (FontRange range in ranges)
    {
        if (intval >= range.Low && intval <= range.High)
        {
            isCharacterPresent = true;
            break;
        }
    }
    return isCharacterPresent;
}

然后,假设你有一个要检查的字符 char toCheck 和一个要测试它的字体 Font theFont...

if (!CheckIfCharInFont(toCheck, theFont) {
    // not present
}

使用VB.Net相同的代码

<DllImport("gdi32.dll")> _
Public Shared Function GetFontUnicodeRanges(ByVal hds As IntPtr, ByVal lpgs As IntPtr) As UInteger
End Function  

<DllImport("gdi32.dll")> _
Public Shared Function SelectObject(ByVal hDc As IntPtr, ByVal hObject As IntPtr) As IntPtr
End Function  

Public Structure FontRange
    Public Low As UInt16
    Public High As UInt16
End Structure  

Public Function GetUnicodeRangesForFont(ByVal font As Font) As List(Of FontRange)
    Dim g As Graphics
    Dim hdc, hFont, old, glyphSet As IntPtr
    Dim size As UInteger
    Dim fontRanges As List(Of FontRange)
    Dim count As Integer

    g = Graphics.FromHwnd(IntPtr.Zero)
    hdc = g.GetHdc()
    hFont = font.ToHfont()
    old = SelectObject(hdc, hFont)
    size = GetFontUnicodeRanges(hdc, IntPtr.Zero)
    glyphSet = Marshal.AllocHGlobal(CInt(size))
    GetFontUnicodeRanges(hdc, glyphSet)
    fontRanges = New List(Of FontRange)
    count = Marshal.ReadInt32(glyphSet, 12)

    For i = 0 To count - 1
        Dim range As FontRange = New FontRange
        range.Low = Marshal.ReadInt16(glyphSet, 16 + (i * 4))
        range.High = range.Low + Marshal.ReadInt16(glyphSet, 18 + (i * 4)) - 1
        fontRanges.Add(range)
    Next

    SelectObject(hdc, old)
    Marshal.FreeHGlobal(glyphSet)
    g.ReleaseHdc(hdc)
    g.Dispose()

    Return fontRanges
End Function  

Public Function CheckIfCharInFont(ByVal character As Char, ByVal font As Font) As Boolean
    Dim intval As UInt16 = Convert.ToUInt16(character)
    Dim ranges As List(Of FontRange) = GetUnicodeRangesForFont(font)
    Dim isCharacterPresent As Boolean = False

    For Each range In ranges
        If intval >= range.Low And intval <= range.High Then
            isCharacterPresent = True
            Exit For
        End If
    Next range
    Return isCharacterPresent
End Function  

如果您需要经常调用它,您可能希望缓存您获取的范围,或者将其封装在CharInFontChecker类中。 - jfs
让我感到困扰的是,测试了几种可以正确呈现字形的字体,但却显示不在该字体中。 - Michael Brown

3

Scott的回答很好。这里有另一种方法,如果只检查少量字符串(在我们的情况下每种字体只有1个字符串),可能更快。但如果您使用一种字体来检查大量文本,则可能会更慢。

    [DllImport("gdi32.dll", EntryPoint = "CreateDC", CharSet = CharSet.Auto, SetLastError = true)]
    private static extern IntPtr CreateDC(string lpszDriver, string lpszDeviceName, string lpszOutput, IntPtr devMode);

    [DllImport("gdi32.dll", ExactSpelling = true, SetLastError = true)]
    private static extern bool DeleteDC(IntPtr hdc);

    [DllImport("Gdi32.dll")]
    private static extern IntPtr SelectObject(IntPtr hdc, IntPtr hgdiobj);

    [DllImport("Gdi32.dll", CharSet = CharSet.Unicode)]
    private static extern int GetGlyphIndices(IntPtr hdc, [MarshalAs(UnmanagedType.LPWStr)] string lpstr, int c,
                                              Int16[] pgi, int fl);

    /// <summary>
    /// Returns true if the passed in string can be displayed using the passed in fontname. It checks the font to 
    /// see if it has glyphs for all the chars in the string.
    /// </summary>
    /// <param name="fontName">The name of the font to check.</param>
    /// <param name="text">The text to check for glyphs of.</param>
    /// <returns></returns>
    public static bool CanDisplayString(string fontName, string text)
    {
        try
        {
            IntPtr hdc = CreateDC("DISPLAY", null, null, IntPtr.Zero);
            if (hdc != IntPtr.Zero)
            {
                using (Font font = new Font(new FontFamily(fontName), 12, FontStyle.Regular, GraphicsUnit.Point))
                {
                    SelectObject(hdc, font.ToHfont());
                    int count = text.Length;
                    Int16[] rtcode = new Int16[count];
                    GetGlyphIndices(hdc, text, count, rtcode, 0xffff);
                    DeleteDC(hdc);

                    foreach (Int16 code in rtcode)
                        if (code == 0)
                            return false;
                }
            }
        }
        catch (Exception)
        {
            // nada - return true
            Trap.trap();
        }
        return true;
    }

GetGlyphIndices函数中传递的0xffff值是什么? 微软好像不再有相关文档记录了,他们只提到了GGI_MARK_NONEXISTING_GLYPHS它的值为0x0001。(这里是ANSI版本,但Unicode版本在这方面没有区别。) - Armen Michaeli

1

FreeType是一个可以读取TrueType字体文件(以及其他格式)并可用于查询特定字形的库。但是,FreeType是为渲染而设计的,因此使用它可能会导致您引入比解决方案所需的更多代码。

不幸的是,在OpenType / TrueType字体世界中,甚至没有一个清晰的解决方案;字符到字形映射根据字体类型和最初设计用于哪个平台而有大约十几种不同的定义。您可以尝试查看Microsoft在OpenType规范上的cmap表定义,但这并不是很容易阅读。


0

Scott Nichols发布的代码非常好,除了一个bug:如果字形ID大于Int16.MaxValue,则会抛出OverflowException异常。为了解决这个问题,我添加了以下函数:

Protected Function Unsign(ByVal Input As Int16) As UInt16
    If Input > -1 Then
        Return CType(Input, UInt16)
    Else
        Return UInt16.MaxValue - (Not Input)
    End If
End Function

然后将函数GetUnicodeRangesForFont中的主for循环更改为以下内容:

For i As Integer = 0 To count - 1
    Dim range As FontRange = New FontRange
    range.Low = Unsign(Marshal.ReadInt16(glyphSet, 16 + (i * 4)))
    range.High = range.Low + Unsign(Marshal.ReadInt16(glyphSet, 18 + (i * 4)) - 1)
    fontRanges.Add(range)
Next

0

这篇微软知识库文章可能会有所帮助: http://support.microsoft.com/kb/241020

虽然它有点过时(最初是为Windows 95编写的),但一般原则仍然适用。示例代码是C++,但由于它只是调用标准的Windows API,因此在.NET语言中也很可能可以使用,只需要稍加努力。

-编辑- 看起来旧的95时代API已经被Microsoft称为“Uniscribe”的新API所取代,它应该能够做到你需要的。


3
相反,UniScribe使得OP想要做的事情更加困难,因为UniScribe旨在使查找字形的过程透明化。例如,UniScribe将使用字体回退来选择实际包含缺失字形的不同字体。 - bzlm

0
我只使用了VB.Net单元测试,没有WIN32 API调用。它包括检查特定字符U+2026(省略号)和U+2409(HTab)的代码,并返回具有字形的字符数量(以及低值和高值)。我只对等宽字体感兴趣,但很容易更改...
    Dim fnt As System.Drawing.Font, size_M As Drawing.Size, size_i As Drawing.Size, size_HTab As Drawing.Size, isMonospace As Boolean
    Dim ifc = New Drawing.Text.InstalledFontCollection
    Dim bm As Drawing.Bitmap = New Drawing.Bitmap(640, 64), gr = Drawing.Graphics.FromImage(bm)
    Dim tf As Windows.Media.Typeface, gtf As Windows.Media.GlyphTypeface = Nothing, ok As Boolean, gtfName = ""

    For Each item In ifc.Families
        'TestContext_WriteTimedLine($"N={item.Name}.")
        fnt = New Drawing.Font(item.Name, 24.0)
        Assert.IsNotNull(fnt)

        tf = New Windows.Media.Typeface(item.Name)
        Assert.IsNotNull(tf, $"item.Name={item.Name}")

        size_M = System.Windows.Forms.TextRenderer.MeasureText("M", fnt)
        size_i = System.Windows.Forms.TextRenderer.MeasureText("i", fnt)
        size_HTab = System.Windows.Forms.TextRenderer.MeasureText(ChrW(&H2409), fnt)
        isMonospace = size_M.Width = size_i.Width
        Assert.AreEqual(size_M.Height, size_i.Height, $"fnt={fnt.Name}")

        If isMonospace Then

            gtfName = "-"
            ok = tf.TryGetGlyphTypeface(gtf)
            If ok Then
                Assert.AreEqual(True, ok, $"item.Name={item.Name}")
                Assert.IsNotNull(gtf, $"item.Name={item.Name}")
                gtfName = $"{gtf.FamilyNames(Globalization.CultureInfo.CurrentUICulture)}"

                Assert.AreEqual(True, gtf.CharacterToGlyphMap().ContainsKey(AscW("M")), $"item.Name={item.Name}")
                Assert.AreEqual(True, gtf.CharacterToGlyphMap().ContainsKey(AscW("i")), $"item.Name={item.Name}")

                Dim t = 0, nMin = &HFFFF, nMax = 0
                For n = 0 To &HFFFF
                    If gtf.CharacterToGlyphMap().ContainsKey(n) Then
                        If n < nMin Then nMin = n
                        If n > nMax Then nMax = n
                        t += 1
                    End If
                Next
                gtfName &= $",[x{nMin:X}-x{nMax:X}]#{t}"

                ok = gtf.CharacterToGlyphMap().ContainsKey(AscW(ChrW(&H2409)))
                If ok Then
                    gtfName &= ",U+2409"
                End If
                ok = gtf.CharacterToGlyphMap().ContainsKey(AscW(ChrW(&H2026)))
                If ok Then
                    gtfName &= ",U+2026"
                End If
            End If

            Debug.WriteLine($"{IIf(isMonospace, "*M*", "")} N={fnt.Name}, gtf={gtfName}.")
            gr.Clear(Drawing.Color.White)
            gr.DrawString($"Mi{ChrW(&H2409)} {fnt.Name}", fnt, New Drawing.SolidBrush(Drawing.Color.Black), 10, 10)
            bm.Save($"{fnt.Name}_MiHT.bmp")
        End If
    Next

输出结果为

M N=Consolas,gtf=Consolas,[x0-xFFFC]#2488,U+2026。

M N=Courier New,gtf=Courier New,[x20-xFFFC]#3177,U+2026。

M N=Lucida Console,gtf=Lucida Console,[x20-xFB02]#644,U+2026。

M N=Lucida Sans Typewriter,gtf=Lucida Sans Typewriter,[x20-xF002]#240,U+2026。

M N=MingLiU-ExtB,gtf=MingLiU-ExtB,[x0-x2122]#212。

M N=MingLiU_HKSCS-ExtB,gtf=MingLiU_HKSCS-ExtB,[x0-x2122]#212。

M N=MS Gothic,gtf=MS Gothic,[x0-xFFEE]#15760,U+2026。

M N=NSimSun,gtf=NSimSun,[x20-xFFE5]#28737,U+2026。

M N=OCR A Extended,gtf=OCR A Extended,[x20-xF003]#248,U+2026。

M N=SimSun,gtf=SimSun,[x20-xFFE5]#28737,U+2026。

M N=SimSun-ExtB,gtf=SimSun-ExtB,[x20-x7F]#96。

M N=Webdings,gtf=Webdings,[x20-xF0FF]#446。


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接