将大量的JSON对象转换为Excel表格范围进行写入

7
我正在尝试将一个json api转换为Excel表格。我尝试了不同的解析方法,但目前使用VBA-JSON(类似于VB-JSON但解析速度更快)。到目前为止,我已经将其转换为对象。如果我没错的话,它是一个集合。然而,将对象转换为表格需要大量时间。
以下是我的代码。在我使用的这台旧机器上,HTTP > string使用9秒。解析成对象需要14秒。这些都是可以接受的,但在集合中遍历一列(25k行)的for循环需要30多秒。我需要大约8列从集合中获取数据,这将花费太长时间。在我的i5机器上也需要同样长的时间。
Dim ItemCount As Integer
Dim itemID() As Long

Function httpresp(URL As String) As String
    Dim x As Object: Set x = CreateObject("MSXML2.XMLHTTP")
    x.Open "GET", URL, False
    x.send
    httpresp = x.responseText
End Function

Private Sub btnLoad_Click()
    Application.Calculation = xlCalculationManual
    Application.ScreenUpdating = false

    Dim URL As String: URL = "https://www.gw2shinies.com/api/json/item/tp"
    Dim DecJSON As Object: Set DecJSON = JsonConverter.ParseJson(httpresp(URL))
    ItemCount = DecJSON.Count
    ReDim itemID(1 To ItemCount)
    Range("A2:S25000").Clear                'clear range
    For i = 1 To ItemCount
        Cells(i + 1, 1).Value = DecJSON(i)("item_id")
    Next i
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic

End Sub

有没有什么方法可以更快地从大型集合对象中填充Excel表格?
我也查看了Rest to Excel library,但是在学习了几个小时后我还是不理解它......再加上我不知道即使我让它工作了,它的性能会如何。

是打错字了还是你的Application.ScreenUpdating赋值顺序有误?我认为你想先将其设置为False,然后再设置为True。 - Sobigen
哦,它出问题了。我现在已经修好了,但似乎没有提供任何显著的性能增加。 - Alf
尝试使用这种方法来解析JSON并填充二维数组,然后将该数组分配给单元格范围。 - omegastripes
1
好吧,这并不顺利。10分钟过去了,它仍在运行。我现在正在考虑制作自己的解析器。 - Alf
3个回答

11

考虑下面的例子,这是一个纯VBA JSON解析器。它非常快,但不太灵活,因此适合解析仅包含类似表格数据的简单JSON对象数组。

Option Explicit

Sub Test()
    
    Dim strJsonString As String
    Dim arrResult() As Variant
    
    ' download
    strJsonString = DownloadJson("https://www.gw2shinies.com/api/json/item/tp")
    
    ' process
    arrResult = ConvertJsonToArray(strJsonString)
    
    ' output
    Output Sheets(1), arrResult
    
End Sub

Function DownloadJson(strUrl As String) As String
    
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", strUrl
        .Send
        If .Status <> 200 Then
            Debug.Print .Status
            Exit Function
        End If
        DownloadJson = .responseText
    End With
    
End Function


Function ConvertJsonToArray(strJsonString As String) As Variant
    
    Dim strCnt As String
    Dim strMarkerQuot As String
    Dim arrUnicode() As String
    Dim arrQuots() As String
    Dim arrRows() As String
    Dim arrProps() As String
    Dim arrTokens() As String
    Dim arrHeader() As String
    Dim arrColumns() As Variant
    Dim arrColumn() As Variant
    Dim arrTable() As Variant
    Dim j As Long
    Dim i As Long
    Dim lngMaxRowIdx As Long
    Dim lngMaxColIdx As Long
    Dim lngPrevIdx As Long
    Dim lngFoundIdx As Long
    Dim arrProperty() As String
    Dim strPropName As String
    Dim strPropValue As String
    
    strCnt = Split(strJsonString, "[{")(1)
    strCnt = Split(strCnt, "}]")(0)
    
    strMarkerQuot = Mid(CreateObject("Scriptlet.TypeLib").GUID, 2, 36)
    strCnt = Replace(strCnt, "\\", "\")
    strCnt = Replace(strCnt, "\""", strMarkerQuot)
    strCnt = Replace(strCnt, "\/", "/")
    strCnt = Replace(strCnt, "\b", Chr(8))
    strCnt = Replace(strCnt, "\f", Chr(12))
    strCnt = Replace(strCnt, "\n", vbLf)
    strCnt = Replace(strCnt, "\r", vbCr)
    strCnt = Replace(strCnt, "\t", vbTab)
    
    arrUnicode = Split(strCnt, "\u")
    For i = 1 To UBound(arrUnicode)
        arrUnicode(i) = ChrW(CLng("&H" & Left(arrUnicode(i), 4))) & Mid(arrUnicode(i), 5)
    Next
    strCnt = Join(arrUnicode, "")
    
    arrQuots = Split(strCnt, """")
    ReDim arrTokens(UBound(arrQuots) \ 2)
    For i = 1 To UBound(arrQuots) Step 2
        arrTokens(i \ 2) = Replace(arrQuots(i), strMarkerQuot, """")
        arrQuots(i) = "%" & i \ 2
    Next
    
    strCnt = Join(arrQuots, "")
    strCnt = Replace(strCnt, " ", "")
    
    arrRows = Split(strCnt, "},{")
    lngMaxRowIdx = UBound(arrRows)
    For j = 0 To lngMaxRowIdx
        lngPrevIdx = -1
        arrProps = Split(arrRows(j), ",")
        For i = 0 To UBound(arrProps)
            arrProperty = Split(arrProps(i), ":")
            strPropName = arrProperty(0)
            If Left(strPropName, 1) = "%" Then strPropName = arrTokens(Mid(strPropName, 2))
            lngFoundIdx = GetArrayItemIndex(arrHeader, strPropName)
            If lngFoundIdx = -1 Then
                ReDim arrColumn(lngMaxRowIdx)
                If lngPrevIdx = -1 Then
                    ArrayAddItem arrHeader, strPropName
                    lngPrevIdx = UBound(arrHeader)
                    ArrayAddItem arrColumns, arrColumn
                Else
                    lngPrevIdx = lngPrevIdx + 1
                    ArrayInsertItem arrHeader, lngPrevIdx, strPropName
                    ArrayInsertItem arrColumns, lngPrevIdx, arrColumn
                End If
            Else
                lngPrevIdx = lngFoundIdx
            End If
            strPropValue = arrProperty(1)
            If Left(strPropValue, 1) = "%" Then strPropValue = arrTokens(Mid(strPropValue, 2))
            arrColumns(lngPrevIdx)(j) = strPropValue
        Next
    Next
    lngMaxColIdx = UBound(arrHeader)
    ReDim arrTable(lngMaxRowIdx + 1, lngMaxColIdx)
    For i = 0 To lngMaxColIdx
        arrTable(0, i) = arrHeader(i)
    Next
    For j = 0 To lngMaxRowIdx
        For i = 0 To lngMaxColIdx
            arrTable(j + 1, i) = arrColumns(i)(j)
        Next
    Next
    
    ConvertJsonToArray = arrTable
    
End Function

Sub Output(objSheet As Worksheet, arrCells() As Variant)
    
    With objSheet
        .Select
        .Range(.Cells(1, 1), Cells(UBound(arrCells, 1) + 1, UBound(arrCells, 2) + 1)).Value = arrCells
        .Columns.AutoFit
    End With
    With ActiveWindow
        .SplitColumn = 0
        .SplitRow = 1
        .FreezePanes = True
    End With
    
End Sub

Function GetArrayItemIndex(arrElements, varTest)
    For GetArrayItemIndex = 0 To SafeUBound(arrElements)
        If arrElements(GetArrayItemIndex) = varTest Then Exit Function
    Next
    GetArrayItemIndex = -1
End Function

Sub ArrayAddItem(arrElements, varElement)
    ReDim Preserve arrElements(SafeUBound(arrElements) + 1)
    arrElements(UBound(arrElements)) = varElement
End Sub

Sub ArrayInsertItem(arrElements, lngIndex, varElement)
    Dim i As Long
    ReDim Preserve arrElements(SafeUBound(arrElements) + 1)
    For i = UBound(arrElements) To lngIndex + 1 Step -1
        arrElements(i) = arrElements(i - 1)
    Next
    arrElements(i) = varElement
End Sub

Function SafeUBound(arrTest)
    On Error Resume Next
    SafeUBound = -1
    SafeUBound = UBound(arrTest)
End Function

对我而言,下载(约7 MB)需要大约5秒钟,处理需要10秒钟,输出需要1.5秒钟。生成的工作表包含23694行,包括表头:

worksheet

更新

快速的 jsJsonParser 可以帮助处理大量数据。查看 Douglas Crockford 的 VBA json2.js 实现


谢谢!与遍历集合相比,将数组转换为表格的速度非常快。 - Alf
看起来请求中的关键字 tp https://www.gw2shinies.com/api/json/item/tp 不再被支持,您可以尝试从API文档中选择另一个请求,例如 https://www.gw2shinies.com/api/json/history/19721。 - omegastripes
@omegastripes,非常感谢您的杰作。您能否提供另一个链接,因为这个对我来说无法使用? - YasserKhalil
@YasserKhalil 请尝试访问以下网址:http://web.archive.org/web/20170618161408/https://www.gw2shinies.com/api/json/history/19721 - omegastripes
我在这行代码strMarkerQuot = Mid(CreateObject("Scriptlet.TypeLib").GUID, 2, 36)处遭遇了权限被拒绝的错误。 - YasserKhalil
显示剩余2条评论

2

您是否尝试过使用vba-web工具包(由制作vba-json的同一团队)通过web服务调用?它会自动将JSON结果包装成数据对象。

然后,我创建了一个函数,将简单的类似表格的JSON转换为2D数组,然后将其粘贴到范围中。

首先,这是您可以添加到代码中的函数:

' Converts a simple JSON dictionary into an array
Function ConvertSimpleJsonToArray(data As Variant, ParamArray columnDefinitionsArray() As Variant) As Variant
    Dim NumRows, NumColumns As Long
    NumRows = data.Count
    NumColumns = UBound(columnDefinitionsArray) - LBound(columnDefinitionsArray) + 1

    Dim ResultArray() As Variant
    ReDim ResultArray(0 To NumRows, 0 To (NumColumns - 1)) 'Rows need an extra header row but columns do not

    Dim x, y As Integer

    'Column headers
    For y = LBound(columnDefinitionsArray) To UBound(columnDefinitionsArray)
        ResultArray(LBound(ResultArray), y) = columnDefinitionsArray(y)
    Next

    'Data rows
    For x = 1 To NumRows
        For y = LBound(columnDefinitionsArray) To UBound(columnDefinitionsArray)
            ResultArray(x, y) = data(x)(columnDefinitionsArray(y))
        Next
    Next

    ConvertSimpleJsonToArray = ResultArray
End Function

以下是我尝试调用您的API并将仅4个列填充到Excel的方法:

Sub Auto_Open()
    Dim FocusClient As New WebClient
    FocusClient.BaseUrl = "https://www.gw2shinies.com/api"

    ' Use GetJSON helper to execute simple request and work with response
    Dim Resource As String
    Dim Response As WebResponse

    'Create a Request and get Response
    Resource = "json/item/tp"
    Set Response = FocusClient.GetJson(Resource)

    If Response.StatusCode = WebStatusCode.Ok Then
        Dim ResultArray() As Variant

        ResultArray = ConvertSimpleJsonToArray(Response.data, "item_id", "name", "type", "subtype")

        Dim NumRows, NumColumns As Long
        NumRows = UBound(ResultArray) - LBound(ResultArray) + 1
        NumColumns = UBound(ResultArray, 2) - LBound(ResultArray, 2) + 1

        ActiveSheet.Range("a1").Resize(NumRows, NumColumns).Value = ResultArray
    Else
        Debug.Print "Error: " & Response.Content
    End If
End Sub

是的,运行需要几秒钟时间,但更可能是因为您有26000行数据。即使在Chrome中加载原始JSON也需要几秒钟,而这还包括JSON解析和加载到数组中。您可以通过在每个代码块后使用Debug.Print时间戳来对代码进行基准测试。

希望这可以帮助您!


只是基本的基准测试:
您的JSON数据集为7089kb。
将原始JSON输出到Chrome花费了8.21秒。
将9个列输出到Excel花费了1分钟。
- zemien

1
一次性写入所有值比逐个单元格写入更快。此外,您可能会遇到次要事件触发,因此禁用事件可能有助于提高性能。如果以下代码的性能仍然很差,则问题在于JsonConverter的性能。
Dim ItemCount As Integer
Dim items() As Variant

Function httpresp(URL As String) As String
    Dim x As Object: Set x = CreateObject("MSXML2.XMLHTTP")
    x.Open "GET", URL, False
    x.send
    httpresp = x.responseText
End Function

Private Sub btnLoad_Click()
    Application.Calculation = xlCalculationManual
    Application.ScreenUpdating = False
    Application.EnableEvents = False

    Dim URL As String: URL = "https://www.gw2shinies.com/api/json/item/tp"
    Dim DecJSON As Object: Set DecJSON = JsonConverter.ParseJson(httpresp(URL))
    ItemCount = DecJSON.Count
    ReDim items(1 To ItemCount, 1 To 1)
    Range("A2:S25000").Clear                'clear range
    Dim test As Variant
    For i = 1 To ItemCount
        items(i, 1) = DecJSON(i)("item_id")
        'Cells(i + 1, 1).Value = DecJSON(i)("item_id")
    Next i
    Range(Range("A2"), Range("A2").Offset(ItemCount)).Value = items

    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
    Application.EnableEvents = True
End Sub

我也怀疑,并尝试将对象像那样加载到数组中,但性能损失出现在循环中而不是写入单元格。我想问题真的与JsonConverter的性能有关。 - Alf

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接