Excel VBA代码正在使用正则表达式从HTML文件中提取章节号。然而,该正则表达式包含一个负回顾后断言,在VBA正则表达式中不被支持。"(?<!tbl"")>(\d(\.\d)+)<"
Sub GetAllSectionNumbers()
LRb = Cells(Rows.Count, "B").End(xlUp).Row
Range("B7:C" & LRb).ClearContents
Dim fileDialog As fileDialog
Set fileDialog = Application.fileDialog(msoFileDialogOpen)
fileDialog.AllowMultiSelect = True
fileDialog.Title = "Select HTML files"
fileDialog.Filters.Clear
fileDialog.Filters.Add "HTML files", "*.htm;*.html", 1
If fileDialog.Show <> -1 Then Exit Sub
Dim file As Variant
For Each file In fileDialog.SelectedItems
Dim fileContents As String
Open file For Input As #1
fileContents = Input$(LOF(1), 1)
Close #1
Dim regex As Object
Set regex = CreateObject("VBScript.RegExp")
regex.Pattern = "(?<!tbl"")>(\d(\.\d)+)<"
regex.Global = True
regex.IgnoreCase = True
regex.MultiLine = True
TRET = regex.Pattern
filePath = file
fileFolder = Left(filePath, InStrRev(filePath, "\"))
fileNameSource = Mid(filePath, InStrRev(filePath, "\") + 1, 100)
Dim match As Object
Set match = regex.Execute(fileContents)
Dim i As Long
For i = 0 To match.Count - 1
LRb = Cells(Rows.Count, "B").End(xlUp).Row + 1
Range("B" & LRb).Value = match.Item(i).SubMatches(0)
Range("C" & LRb).Value = fileNameSource
Next i
Next file
MsgBox "Done!"
End Sub
有没有其他的正则表达式解决方案来处理这个问题?
tbl">\d(?:\.\d)+<|>(\d(?:\.\d)+)<
可以工作,只需获取捕获值(match.SubMatches(0)
)。 - Wiktor Stribiżew