如何判断字体是否支持代理对Unicode字符?

8
我找到了一种确定字体是否支持Unicode-16字符的方法。不幸的是,这对于代理对Unicode字符并不适用,因为由GetFontUnicodeRanges函数支持的WCRANGE结构只返回WCHAR(16位)参数作为输出。
以下是我尝试做的示例:
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
    static HFONT hFont = NULL;

    switch (message)
    {
        case WM_CREATE:
        {
            LOGFONT lf = {0};
            lf.lfHeight = -64;
            ::StringCchCopy(lf.lfFaceName, _countof(lf.lfFaceName), L"Arial");

            hFont = ::CreateFontIndirect(&lf);
        }
        break;

        case WM_PAINT:
        {
            PAINTSTRUCT ps;
            HDC hdc = BeginPaint(hWnd, &ps);

            RECT rcClient = {0};
            ::GetClientRect(hWnd, &rcClient);

            HGDIOBJ hOldFont = ::SelectObject(hdc, hFont);

            LPCTSTR pStr = L">\U0001F609<";
            int nLn = wcslen(pStr);

            RECT rc = {20, 20, rcClient.right, rcClient.bottom};
            ::DrawText(hdc, pStr, nLn, &rc, DT_NOPREFIX | DT_SINGLELINE);

            ::SelectObject(hdc, hOldFont);

            EndPaint(hWnd, &ps);
        }
        break;

    //....

如果我在Windows 10上运行它,我会得到这个:

enter image description here

但这是我在Windows 7上得到的:

enter image description here

那么如何判断中间字符是否不会被呈现?


PS. 我也尝试使用文档不够清晰的 Uniscribe 和修改过的 this tutorial 作为示例。但无论我做了什么,都无法在Win10和Win7之间产生可辨别的结果。如果这有助于回答这个问题,这是我尝试的代码:

//Call from WM_PAINT handler
std::wstring str;
test02(hdc, pStr, str);

RECT rc0 = {0, 200, rcClient.right, rcClient.bottom};
::DrawText(hdc, str.c_str(), str.size(), &rc0, DT_NOPREFIX | DT_SINGLELINE);

然后:
void test02(HDC hDc, LPCTSTR pStr, std::wstring& str)
{
    //'str' = receives debugging outcome (needs to be printed on the screen)

    //SOURCE:
    //      https://maxradi.us/documents/uniscribe/

    HRESULT hr;
    SCRIPT_STRING_ANALYSIS ssa = {0};

    int nLn = wcslen(pStr);

    hr = ::ScriptStringAnalyse(hDc,
        pStr,
        nLn,
        1024,
        -1,
        SSA_GLYPHS,
        0, NULL, NULL, NULL, NULL, NULL, &ssa);

    if(SUCCEEDED(hr))
    {
        const SCRIPT_PROPERTIES **g_ppScriptProperties;
        int g_iMaxScript;

        hr = ::ScriptGetProperties(&g_ppScriptProperties, &g_iMaxScript);
        if(SUCCEEDED(hr))
        {
            const int cMaxItems = 20;
            SCRIPT_ITEM si[cMaxItems + 1];
            SCRIPT_ITEM *pItems = si;
            int cItems;                 //Receives number of glyphs

            SCRIPT_CONTROL scrCtrl = {0};
            SCRIPT_STATE scrState = {0};

            hr = ::ScriptItemize(pStr, nLn, cMaxItems, &scrCtrl, &scrState, pItems, &cItems);
            if(SUCCEEDED(hr))
            {
                FormatAdd2(str, L"cItems=%d: ", cItems);

                int nCntGlyphs = nLn * 4;
                WORD* pGlyphs = new WORD[nCntGlyphs];
                WORD* pLogClust = new WORD[nLn];
                SCRIPT_VISATTR* pSVs = new SCRIPT_VISATTR[nCntGlyphs];

                //Go through each run
                for(int i = 0; i < cItems; i++)
                {
                    FormatAdd2(str, L"[%d]:", i);

                    SCRIPT_CACHE sc = NULL;
                    int nCntGlyphsWrtn = 0;
                    int iPos = pItems[i].iCharPos;
                    const WCHAR* pP = &pStr[iPos];
                    int cChars = i + 1 < cItems ? pItems[i + 1].iCharPos - iPos : nLn - iPos;

                    hr = ::ScriptShape(hDc, &sc, pP, cChars,
                        nCntGlyphs, &pItems[i].a, pGlyphs, pLogClust, pSVs, &nCntGlyphsWrtn);

                    if(SUCCEEDED(hr))
                    {
                        std::wstring strGlyphs;
                        for(int g = 0; g < nCntGlyphsWrtn; g++)
                        {
                            FormatAdd2(strGlyphs, L"%02X,", pGlyphs[g]);
                        }

                        std::wstring strLogClust;
                        for(int w = 0; w < cChars; w++)
                        {
                            FormatAdd2(strLogClust, L"%02X,", pLogClust[w]);
                        }

                        std::wstring strSVs;
                        for(int g = 0; g < nCntGlyphsWrtn; g++)
                        {
                            FormatAdd2(strSVs, L"%02X,", pSVs[g]);
                        }

                        FormatAdd2(str, L"c=%d {G:%s LC:%s SV:%s} ", nCntGlyphsWrtn, strGlyphs.c_str(), strLogClust.c_str(), strSVs.c_str());


                        int* pAdvances = new int[nCntGlyphsWrtn];
                        GOFFSET* pOffsets = new GOFFSET[nCntGlyphsWrtn];
                        ABC abc = {0};

                        hr = ::ScriptPlace(hDc, &sc, pGlyphs, nCntGlyphsWrtn, pSVs, &pItems[i].a, pAdvances, pOffsets, &abc);
                        if(SUCCEEDED(hr))
                        {
                            std::wstring strAdvs;
                            for(int g = 0; g < nCntGlyphsWrtn; g++)
                            {
                                FormatAdd2(strAdvs, L"%02X,", pAdvances[g]);
                            }

                            std::wstring strOffs;
                            for(int g = 0; g < nCntGlyphsWrtn; g++)
                            {
                                FormatAdd2(strOffs, L"u=%02X v=%02X,", pOffsets[g].du, pOffsets[g].dv);
                            }


                            FormatAdd2(str, L"{a=%d,b=%d,c=%d} {A:%s OF:%s}", abc.abcA, abc.abcB, abc.abcC, strAdvs.c_str(), strOffs.c_str());
                        }


                        delete[] pAdvances;
                        delete[] pOffsets;

                    }



                    //Clear cache
                    hr = ::ScriptFreeCache(&sc);
                    assert(SUCCEEDED(hr));
                }

                delete[] pSVs;
                delete[] pGlyphs;
                delete[] pLogClust;
            }
        }

        hr = ::ScriptStringFree(&ssa);
        assert(SUCCEEDED(hr));
    }
}

std::wstring& FormatAdd2(std::wstring& str, LPCTSTR pszFormat, ...)
{
    va_list argList;
    va_start(argList, pszFormat);
    int nSz = _vsctprintf(pszFormat, argList) + 1;

    TCHAR* pBuff = new TCHAR[nSz];      //One char for last null
    pBuff[0] = 0;
    _vstprintf_s(pBuff, nSz, pszFormat, argList);
    pBuff[nSz - 1] = 0;

    str.append(pBuff);

    delete[] pBuff;

    va_end(argList);

    return str;
}

编辑:我已经能够创建一个演示GUI应用程序,展示了Barmak Shemirani下面提出的解决方案。


1
你试过使用GetGlyphIndicesW吗? - Alex Guteniev
@AlexanderGutenev:是的,同样的问题。它不能处理比16位更宽的字形。 - c00000fd
1个回答

7

字符在Windows 10 Arial字体中实际上不受支持。Windows 10使用"Segoe UI Emoji"作为该特定代码点的回退字体。

所以首先我们要确定是否使用了回退字体。然后检查字形索引,看看它是否是豆腐字符(通常显示为方块符号

我们可以使用元文件来查找是否使用了字体替换。将该字体选择到HDC中。

使用ScriptGetFontProperties查找不支持的字形的值。

使用GetCharacterPlacement查找字符串的字形索引。如果字形索引与不支持的字形匹配,则该代码点被打印为豆腐


编辑:

如果您尝试打印中文字符等,则必须选择适当的字体(对于中文为SimSun)

这部分由IMLangFontLink完成。这是一种不同类型的字体替换。下面的示例将测试单个代码点(可以扩展为处理字符串)。

如果选择了Segoe UI字体,则对于中文字符,它将切换Segoe UISimSun

对于表情符号,它将切换Segoe UISegoe UI Emoji

另请参阅oldnewthing中的文章。请注意,OldNewThing中的文章不处理表情符号,它只让TextOut处理它(在Windows 10中正确处理,因此结果看起来可以接受)


#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <windows.h>
#include <usp10.h>
#include <AtlBase.h>
#include <AtlCom.h>
#include <mlang.h>

#pragma comment(lib, "Usp10.lib")

int CALLBACK metafileproc(HDC, HANDLETABLE*, const ENHMETARECORD *record,
    int, LPARAM logfont)
{
    if(record->iType == EMR_EXTCREATEFONTINDIRECTW)
    {
        auto ptr = (const EMREXTCREATEFONTINDIRECTW*)record;
        *(LOGFONT*)logfont = ptr->elfw.elfLogFont;
    }
    return 1;
}

HFONT GetFallbackFont(const wchar_t *str, HFONT hfont_test)
{
    //use metafile to find the fallback font
    auto metafile_hdc = CreateEnhMetaFile(NULL, NULL, NULL, NULL);
    auto metafile_oldfont = SelectObject(metafile_hdc, hfont_test);

    SCRIPT_STRING_ANALYSIS ssa;
    ScriptStringAnalyse(metafile_hdc, str, wcslen(str), 0, -1,
        SSA_METAFILE | SSA_FALLBACK | SSA_GLYPHS | SSA_LINK,
        0, NULL, NULL, NULL, NULL, NULL, &ssa);
    ScriptStringOut(ssa, 0, 0, 0, NULL, 0, 0, FALSE);
    ScriptStringFree(&ssa);

    SelectObject(metafile_hdc, metafile_oldfont);
    auto hmetafile = CloseEnhMetaFile(metafile_hdc);

    LOGFONT logfont = { 0 };
    EnumEnhMetaFile(0, hmetafile, metafileproc, &logfont, NULL);
    wprintf(L"Selecting fallback font: %s\n", logfont.lfFaceName);
    HFONT hfont = CreateFontIndirect(&logfont);
    DeleteEnhMetaFile(hmetafile);

    return hfont;
}

//IsTofu is for testing emojis
//It accepts a Unicode string
bool IsTofuError(HDC hdc, HFONT hfont_test, const wchar_t *str)
{
    if(wcsstr(str, L" "))
    {
        wprintf(L"*** cannot test strings containing blank space\n");
    }

    auto hfont = GetFallbackFont(str, hfont_test);
    auto oldfont = SelectObject(hdc, hfont);

    //find the characters not supported in this font
    //note, blank space is blank, unsupported fonts can be blank also
    SCRIPT_CACHE sc = NULL;
    SCRIPT_FONTPROPERTIES fp = { sizeof(fp) };
    ScriptGetFontProperties(hdc, &sc, &fp);
    ScriptFreeCache(&sc);
    wprintf(L"SCRIPT_FONTPROPERTIES:\n");
    wprintf(L" Blank: %d, Default: %d, Invalid: %d\n",
        fp.wgBlank, fp.wgDefault, fp.wgInvalid);

    // Get glyph indices for the string
    GCP_RESULTS gcp_results = { sizeof(GCP_RESULTS) };
    gcp_results.nGlyphs = wcslen(str);
    auto wstr_memory = (wchar_t*)calloc(wcslen(str) + 1, sizeof(wchar_t));
    gcp_results.lpGlyphs = wstr_memory;
    GetCharacterPlacement(hdc, str, wcslen(str), 0, &gcp_results, GCP_GLYPHSHAPE);

    //check the characters against wgBlank...
    bool istofu = false;
    wprintf(L"Glyphs:");
    for(UINT i = 0; i < gcp_results.nGlyphs; i++)
    {
        wchar_t n = gcp_results.lpGlyphs[i];
        wprintf(L"%d,", (int)n);
        if(n == fp.wgBlank || n == fp.wgInvalid || n == fp.wgDefault)
            istofu = true;
    }
    wprintf(L"\n");

    free(wstr_memory);
    SelectObject(hdc, oldfont);
    DeleteObject(hfont);

    if (istofu)
        wprintf(L"Tofu error\n\n");

    return istofu;
}

//get_font_link checks if there is font substitution, 
//this usually applies to Asian fonts
//Note, this function doesn't accept a unicode string
//it only takes a single code point. You can imrpove it to accept strings
bool get_font_link(const wchar_t *single_codepoint,
    HDC hdc,
    HFONT &hfont_src,
    HFONT &hfont_dst,
    CComPtr<IMLangFontLink> &ifont,
    CComPtr<IMLangCodePages> &icodepages)
{
    DWORD codepages_dst[100] = { 0 };
    LONG codepages_count = 100;

    DWORD codepages = 0;
    if(FAILED(icodepages->GetStrCodePages(single_codepoint, wcslen(single_codepoint),
        0, codepages_dst, &codepages_count)))
        return false;
    codepages = codepages_dst[0];

    if(FAILED(ifont->MapFont(hdc, codepages_dst[0], hfont_src, &hfont_dst)))
        return false;

    SelectObject(hdc, hfont_dst);
    wchar_t buf[100];
    GetTextFace(hdc, _countof(buf), buf);
    wprintf(L"get_font_link:\nSelecting a different font: %s\n", buf);
    return true;
}

int main()
{
    CoInitialize(NULL);

    {
        CComPtr<IMultiLanguage> imultilang;
        CComPtr<IMLangFontLink> ifont;
        CComPtr<IMLangCodePages> icodepages;
        if(FAILED(imultilang.CoCreateInstance(CLSID_CMultiLanguage))) return 0;
        if(FAILED(imultilang->QueryInterface(&ifont))) return 0;
        if(FAILED(imultilang->QueryInterface(&icodepages))) return 0;

        //const wchar_t *single_codepoint = L"a";
        //const wchar_t *single_codepoint = L"请";
        const wchar_t *single_codepoint = L"";

        auto hdc = GetDC(0);
        auto memdc = CreateCompatibleDC(hdc);
        auto hbitmap = CreateCompatibleBitmap(hdc, 1, 1);
        auto oldbmp = SelectObject(memdc, hbitmap);
        auto hfont_src = CreateFont(10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, L"Segoe UI");
        auto oldfont = SelectObject(hdc, hfont_src);

        HFONT hfont_dst = NULL;

        if(IsTofuError(hdc, hfont_src, single_codepoint))
        {
            if(!get_font_link(
                single_codepoint, memdc, hfont_src, hfont_dst, ifont, icodepages))
                wprintf(L"Can't find a substitution!\n");
        }

        SelectObject(memdc, oldbmp);
        SelectObject(memdc, oldfont);
        DeleteObject(hbitmap);
        DeleteDC(memdc);
        ReleaseDC(0, hdc);
        DeleteObject(hfont_src);

        if(ifont && hfont_dst)
            ifont->ReleaseFont(hfont_dst);
    }

    CoUninitialize();

    return 0;
}

输出:

对于Windows 10,IsTofufalse

对于一些旧版本的Windows,这个值可能是true,但在WinXP中未经过测试。

使用此链接中的GetUniscribeFallbackFont函数。

注意,Windows文档将GetCharacterPlacement描述为已过时,建议使用Uniscribe函数。但我不知道在这里要使用哪种替代方法。


谢谢!这很酷,尽管我还在努力理解你在做什么。你能否在IsTofu中添加更多注释以更好地解释一下?另外需要注意的是,它不适用于WinXP——IsTofu返回false。虽然我们可以安全地假设XP不支持代理对吗?最后,如果字符不是代理对,我们可以使用更简单的API:WORD gi = 0; GetGlyphIndicesW(hdc, str, 1, &gi, GGI_MARK_NONEXISTING_GLYPHS); 然后检查非支持字形的gi == -1吗? - c00000fd
NT4只支持UCS2编码,2000版本开始支持一些代理项(surrogate)字符,而XP版本对其提供更完善的支持。了解更多内容,请参考https://learn.microsoft.com/en-us/windows/desktop/intl/surrogates-and-supplementary-characters。 - Anders
@Anders:嗯...谢谢。你有机会看一下为什么他的代码在XP下似乎无法工作吗? - c00000fd
@BarmakShemirani:好的,谢谢。现在好像可以了。这是结果 - c00000fd
1
好的,从技术上讲,只使用最后一个字体就可以正常工作。(我无法看到如何解析元文件输出,这超出了我的能力范围。)感谢您的帮助。顺便提醒,使用此代码的人要注意一点,我调整了他的最后一次编辑,修复了get_font_link()函数中的资源泄漏和缓冲区溢出漏洞,最好不要把这些问题包含进去,因为相当多的人盲目地将代码从 SO 复制到他们的商业软件中。我的调整后的代码和演示可以在这里的 Github 上找到 - c00000fd
显示剩余9条评论

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接