我需要在执行页面上的JavaScript后访问HTML文档的DOM。我有以下连接到URL并获取文档的代码。 问题是它从未获得经过JavaScript修改后的DOM
public class CustomBrowser
{
public CustomBrowser()
{
//
// TODO: Add constructor logic here
//
}
protected string _url;
string html = "";
WebBrowser browser;
public string GetWebpage(string url)
{
_url = url;
// WebBrowser is an ActiveX control that must be run in a
// single-threaded apartment so create a thread to create the
// control and generate the thumbnail
Thread thread = new Thread(new ThreadStart(GetWebPageWorker));
thread.SetApartmentState(ApartmentState.STA);
thread.Start();
thread.Join();
string s = html;
return s;
}
protected void GetWebPageWorker()
{
browser = new WebBrowser();
// browser.ClientSize = new Size(_width, _height);
browser.ScrollBarsEnabled = false;
browser.ScriptErrorsSuppressed = true;
//browser.DocumentCompleted += browser_DocumentCompleted;
browser.Navigate(_url);
// Wait for control to load page
while (browser.ReadyState != WebBrowserReadyState.Complete)
Application.DoEvents();
Thread.Sleep(5000);
var documentAsIHtmlDocument3 = (mshtml.IHTMLDocument3)browser.Document.DomDocument;
html = documentAsIHtmlDocument3.documentElement.outerHTML;
browser.Dispose();
}
}
我希望有人能够帮助我解决这个问题。
while
循环和Application.DoEvents()
或Thread.Sleep()
。 - Heretic Monkey