在C#中验证HTML5

7
我们目前正在使用C#构建一个全新的应用程序。我们有广泛的UI测试,使用Selenium Web Driver。这些测试(以及单元测试)由我们的CI服务器运行。
Selenium公开了.PageSource属性,将该源通过HTML5验证器运行作为每个UI测试的另一部分是有意义的(对我来说)。
我想捕捉与http://validator.w3.org/相同类型的问题。作为奖励,我还想解决508问题。
我的问题是,我找不到任何可以在本地轻松集成到我的UI测试中的东西。W3C网站公开了SOAP API,但我不想在CI过程中访问他们的网站。他们似乎也不支持获取SOAP响应。我想避免在本地安装完整的W3C服务器。
我看到的最接近的东西是http://www.totalvalidator.com/,使用它需要编写临时文件并解析报告。
我想在走这条路之前看看是否有其他方法。最好是可以调用的DotNet程序集。

c


W3服务是开源的:http://validator.w3.org/source/ - Arran
3个回答

1
在花费整个周末解决这个问题后,我唯一能想到的解决方案是一个商业库,称为CSE HTML验证器。
它可以在这里找到 http://www.htmlvalidator.com/htmldownload.html 我为它编写了一个简单的包装器,以下是代码。
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;

[assembly: CLSCompliant(true)]
namespace HtmlValidator
{

public class Validator
{
    #region Constructors...

    public Validator(string htmlToValidate)
    {
        HtmlToValidate = htmlToValidate;
        HasExecuted = false;
        Errors = new List<ValidationResult>();
        Warnings = new List<ValidationResult>();
        OtherMessages = new List<ValidationResult>();

    }

    #endregion



    #region Properties...
    public IList<ValidationResult> Errors { get; private set; }
    public bool HasExecuted { get; private set; }
    public string HtmlToValidate { get; private set; }
    public IList<ValidationResult> OtherMessages { get; private set; }
    public string ResultsString { get; private set; }
    public string TempFilePath { get; private set; }
    public IList<ValidationResult> Warnings { get; private set; }
    #endregion



    #region Public methods...
    public void ValidateHtmlFile()
    {

        WriteTempFile();

        ExecuteValidator();

        DeleteTempFile();

        ParseResults();

        HasExecuted = true;
    }

    #endregion



    #region Private methods...
    private void DeleteTempFile()
    {
        TempFilePath = Path.GetTempFileName();
        File.Delete(TempFilePath);
    }


    private void ExecuteValidator()
    {
        var psi = new ProcessStartInfo(GetHTMLValidatorPath())
        {
            RedirectStandardInput = false,
            RedirectStandardOutput = true,
            RedirectStandardError = false,
            UseShellExecute = false,
            Arguments = String.Format(@"-e,(stdout),0,16 ""{0}""", TempFilePath)
        };

        var p = new Process
        {
            StartInfo = psi
        };
        p.Start();

        var stdOut = p.StandardOutput;

        ResultsString = stdOut.ReadToEnd();
    }


    private static string GetHTMLValidatorPath()
    {
        return @"C:\Program Files (x86)\HTMLValidator120\cmdlineprocessor.exe";
    }


    private void ParseResults()
    {
        var results = JsonConvert.DeserializeObject<dynamic>(ResultsString);
        IList<InternalValidationResult> messages = results.messages.ToObject<List<InternalValidationResult>>();


        foreach (InternalValidationResult internalValidationResult in messages)
        {
            ValidationResult result = new ValidationResult()
            {
                Message = internalValidationResult.message,
                LineNumber = internalValidationResult.linenumber,
                MessageCategory = internalValidationResult.messagecategory,
                MessageType = internalValidationResult.messagetype,
                CharLocation = internalValidationResult.charlocation
            };

            switch (internalValidationResult.messagetype)
            {
                case "ERROR":
                    Errors.Add(result);
                    break;

                case "WARNING":
                    Warnings.Add(result);
                    break;

                default:
                    OtherMessages.Add(result);
                    break;
            }
        }
    }


    private void WriteTempFile()
    {
        TempFilePath = Path.GetTempFileName();
        StreamWriter streamWriter = File.AppendText(TempFilePath);
        streamWriter.WriteLine(HtmlToValidate);
        streamWriter.Flush();
        streamWriter.Close();
    }
    #endregion
}
}




public class ValidationResult
{
    public string MessageType { get; set; }
    public string MessageCategory { get; set; }
    public string Message { get; set; }
    public int LineNumber { get; set; }
    public int CharLocation { get; set; }


    public override string ToString()
    {
        return String.Format("{0} Line {1} Char {2}:: {3}", this.MessageType, this.LineNumber, this.CharLocation, this.Message);

    }

}


public class InternalValidationResult
{
    /*
     * DA: this class is used as in intermediate store of messages that come back from the underlying validator. The fields must be cased as per the underlying Json object.
     * That is why they are ignored.
     */
    #region Properties...
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "charlocation"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "charlocation")]
    public int charlocation { get; set; }
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "linenumber"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "linenumber")]

    public int linenumber { get; set; }
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "message"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "message")]

    public string message { get; set; }
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "messagecategory"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "messagecategory")]
    public string messagecategory { get; set; }
    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "messagetype"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "messagetype")]

    public string messagetype { get; set; }
    #endregion
}

使用/测试

   private const string ValidHtml = "<!DOCType html><html><head></head><body><p>Hello World</p></body></html>";
    private const string BrokenHtml = "<!DOCType html><html><head></head><body><p>Hello World</p></body>";

    [TestMethod]
    public void CanValidHtmlStringReturnNoErrors()
    {
        Validator subject = new Validator(ValidHtml);
        subject.ValidateHtmlFile();
        Assert.IsTrue(subject.HasExecuted);
        Assert.IsTrue(subject.Errors.Count == 0);
    }


    [TestMethod]
    public void CanInvalidHtmlStringReturnErrors()
    {
        Validator subject = new Validator(BrokenHtml);
        subject.ValidateHtmlFile();
        Assert.IsTrue(subject.HasExecuted);
        Assert.IsTrue(subject.Errors.Count > 0);
        Assert.IsTrue(subject.Errors[0].ToString().Contains("ERROR"));
    }

1
最好的HTML5验证器,nu checker,是用Java编写的,很难与.NET接口。但libtidy可以编写成C++动态链接库,从托管代码中调用。他们发布的示例程序对我来说做得很好,稍加调整即可。

LibTidy.h:

public ref class LibTidy
{
public:
    System::String^ __clrcall Test(System::String^ input);
};

LibTidy.cpp:

System::String^ __clrcall LibTidy::Test(System::String^ input)
{
    CStringW cstring(input);
  
    const size_t newsizew = (cstring.GetLength() + 1) * 2;
    char* nstringw = new char[newsizew];
    size_t convertedCharsw = 0;
    wcstombs_s(&convertedCharsw, nstringw, newsizew, cstring, _TRUNCATE);

        TidyBuffer errbuf = { 0 };
        int rc = -1;
        Bool ok;

        TidyDoc tdoc = tidyCreate();                     // Initialize "document"
                
        ok = tidyOptSetBool(tdoc, TidyShowInfo, no);
        ok = tidyOptSetBool(tdoc, TidyQuiet, yes);
        ok = tidyOptSetBool(tdoc, TidyEmacs, yes);
        if (ok)
            rc = tidySetErrorBuffer(tdoc, &errbuf);      // Capture diagnostics
        if (rc >= 0)
            rc = tidyParseString(tdoc, nstringw);           // Parse the input
        if (rc >= 0)
            rc = tidyCleanAndRepair(tdoc);               // Tidy it up!
        if (rc >= 0)
            rc = tidyRunDiagnostics(tdoc);               // Kvetch
        char* outputBytes = (char*)errbuf.bp;
    
        if (errbuf.allocator != NULL) tidyBufFree(&errbuf);
        tidyRelease(tdoc);

        return gcnew System::String(outputBytes);
    }

0

感谢您的回复。第一个链接中的二进制工具都无法处理HTML5。W3C服务似乎不允许您将SOAP响应与发送任意内容进行验证相结合(即,您只能给他们提供要检查的网站链接)。http://www.htmlvalidator.com/htmlval/developer.html 看起来是目前最好的解决方案。它是命令行驱动的,并且可以处理HTML5。 - Dave

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接