使用iTextSharp在c#.net中合并多个PDF文件

59

我正在尝试将多个PDF合并为一个。

编译时没有出现错误。我尝试先合并文档,但由于我正在处理表格,所以出现了问题。

这是asp.net的代码后端。

if (Button.Equals("PreviewWord")) {

        String eventTemplate = Server.MapPath("/ERAS/Badges/Template/EventTemp" + EventName + ".doc");

        String SinglePreview = Server.MapPath("/ERAS/Badges/Template/PreviewSingle" + EventName + ".doc");

        String PDFPreview = Server.MapPath("/ERAS/Badges/Template/PDFPreviewSingle" + EventName + ".pdf");

        String previewPDFs = Server.MapPath("/ERAS/Badges/Template/PreviewPDFs" + EventName + ".pdf");

        if (System.IO.File.Exists((String)eventTemplate))
        {

            if (vulGegevensIn == true)
            {
              //This creates a Worddocument and fills in names etc from database
                CreateWordDocument(vulGegevensIn, eventTemplate, SinglePreview, false);
                //This saves the SinglePreview.doc as a PDF @param place of PDFPreview
                CreatePDF(SinglePreview, PDFPreview);


                //Trying to merge
                String[] previewsSmall=new String[1];
                previewsSmall[0] = PDFPreview;
                PDFMergenITextSharp.MergeFiles(previewPDFs, previewsSmall);
            }


            // merge PDFs here...........................;
            //here
            //no here//
            //...


    } }

这是PDFMergenITextSharpClass

public static class PDFMergenITextSharp {

public static void MergeFiles(string destinationFile, string[] sourceFiles)
{

    try
    {
        int f = 0;
        // we create a reader for a certain document
        PdfReader reader = new PdfReader(sourceFiles[f]);
        // we retrieve the total number of pages
        int n = reader.NumberOfPages;
        //Console.WriteLine("There are " + n + " pages in the original file.");
        // step 1: creation of a document-object
        Document document = new Document(reader.GetPageSizeWithRotation(1));
        // step 2: we create a writer that listens to the document
        PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(destinationFile, FileMode.Create));
        // step 3: we open the document
        document.Open();
        PdfContentByte cb = writer.DirectContent;
        PdfImportedPage page;
        int rotation;
        // step 4: we add content
        while (f < sourceFiles.Length)
        {
            int i = 0;
            while (i < n)
            {
                i++;
                document.SetPageSize(reader.GetPageSizeWithRotation(i));
                document.NewPage();
                page = writer.GetImportedPage(reader, i);
                rotation = reader.GetPageRotation(i);
                if (rotation == 90 || rotation == 270)
                {
                    cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
                }
                else
                {
                    cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
                }
                //Console.WriteLine("Processed page " + i);
            }
            f++;
            if (f < sourceFiles.Length)
            {
                reader = new PdfReader(sourceFiles[f]);
                // we retrieve the total number of pages
                n = reader.NumberOfPages;
                //Console.WriteLine("There are " + n + " pages in the original file.");
            }
        }
        // step 5: we close the document
        document.Close();
    }
    catch (Exception e)
    {
        string strOb = e.Message;
    }
}

public static int CountPageNo(string strFileName)
{
    // we create a reader for a certain document
    PdfReader reader = new PdfReader(strFileName);
    // we retrieve the total number of pages
    return reader.NumberOfPages;
}
}

4
请使用PdfCopy代替PdfWriter。有许多示例和相关问题可以参考。 - Mark Storer
1
@Liquid -- CreatePDF(SinglePreview, PDFPreview); 请问你是如何从文档中创建PDF的?如果您能提供一些具体细节,那将非常有帮助。我希望您正在使用iTextSharp进行文档转换成PDF。 - kbvishnu
7个回答

73

我找到了答案:

不要使用第二种方法,而是将更多的文件添加到第一个输入文件数组中。

public static void CombineMultiplePDFs(string[] fileNames, string outFile)
{
    // step 1: creation of a document-object
    Document document = new Document();
    //create newFileStream object which will be disposed at the end
    using (FileStream newFileStream = new FileStream(outFile, FileMode.Create))
    {
       // step 2: we create a writer that listens to the document
       PdfCopy writer = new PdfCopy(document, newFileStream);

       // step 3: we open the document
       document.Open();

       foreach (string fileName in fileNames)
       {
           // we create a reader for a certain document
           PdfReader reader = new PdfReader(fileName);
           reader.ConsolidateNamedDestinations();

           // step 4: we add content
           for (int i = 1; i <= reader.NumberOfPages; i++)
           {
               PdfImportedPage page = writer.GetImportedPage(reader, i);
               writer.AddPage(page);
           }

           PRAcroForm form = reader.AcroForm;
           if (form != null)
           {
               writer.CopyAcroForm(reader);
           }

           reader.Close();
       }

       // step 5: we close the document and writer
       writer.Close();
       document.Close();
   }//disposes the newFileStream object
}
    

2
@liquid - 抱歉,您能说明一下您用了哪些参考资料使其工作吗? - cgraham720
35
"PdfCopy没有包含CopyAcroForm的定义"。 - Christine
2
不得不将代码中的 PRAcroForm form = reader.AcroForm; 改为 PrAcroForm form = reader.AcroForm;(小写字母'r'代替大写字母,否则会出现错误 - 但是这个修改并没有在此部分生效...) - misanthrop
1
@misanthrop 这次编辑没有被考虑进去,因为该对象的类名是大写字母 R 的 "PRAcroForm"。但我不知道您使用的是哪个 iTextsharp 版本。不过,我很高兴这能帮助到您。 - Liquid
3
我们正在使用 iTextSharp.LGPLv2.Core,它实际上是 iTextSharp(v4.1.6) 的非官方移植版本,也许这与此有关...也许这条评论也能帮助其他人 =) - misanthrop
显示剩余4条评论

48

我在这个网站上找到了一个非常好的解决方案:http://weblogs.sqlteam.com/mladenp/archive/2014/01/10/simple-merging-of-pdf-documents-with-itextsharp-5-4-5.aspx

我按照以下方式更新了方法:

public static bool MergePdfs(IEnumerable<string> fileNames, string targetFileName)
{
    bool success = true;
    using (FileStream stream = new(targetFileName, FileMode.Create))
    {
        Document document = new();
        PdfCopy pdf = new(document, stream);
        PdfReader? reader = null;

        try
        {
            document.Open();
            foreach (string file in fileNames)
            {
                reader = new PdfReader(file);
                pdf.AddDocument(reader);
                reader.Close();
            }
        }
        catch (Exception)
        {
            success = false;
            reader?.Close();
        }
        finally
        {
            document?.Close();
        }
    }

    return success;
}

7
我更喜欢这个解决方案,因为它不涉及已被弃用的“CopyAcroForm”功能,在最新版本的“itextsharp”中不再可用。 - Matze
在我这里完美地运行,使用了几个文档和最新版本的nuget上可用的itextsharp库 :) - AFract
据我所知,几乎没有理由不使用PdfSmartCopy而选择PdfCopy。至少对我来说,PDF文件大小的节省非常显著。 - Bacon Bits

14

使用iTextSharp合并PDF的代码

public static void Merge(List<String> InFiles, String OutFile)
{
    using (FileStream stream = new FileStream(OutFile, FileMode.Create))
    using (Document doc = new Document())
    using (PdfCopy pdf = new PdfCopy(doc, stream))
    {
        doc.Open();

        PdfReader reader = null;
        PdfImportedPage page = null;

        //fixed typo
        InFiles.ForEach(file =>
        {
            reader = new PdfReader(file);

            for (int i = 0; i < reader.NumberOfPages; i++)
            {
                page = pdf.GetImportedPage(reader, i + 1);
                pdf.AddPage(page);
            }

            pdf.FreeReader(reader);
            reader.Close();
            File.Delete(file);
        });
    }
}

29
若有人只是简单复制你的代码而不认真阅读可能会遇到麻烦:并非每种合并情况下源文件都需要被删除! - mkl
这仍然有效。确保你理解File.Delete(file)的作用。 - Nandostyle

7
使用iTextSharp.dll
protected void Page_Load(object sender, EventArgs e)
{
    String[] files = @"C:\ENROLLDOCS\A1.pdf,C:\ENROLLDOCS\A2.pdf".Split(',');
    MergeFiles(@"C:\ENROLLDOCS\New1.pdf", files);
}
public void MergeFiles(string destinationFile, string[] sourceFiles)
{
    if (System.IO.File.Exists(destinationFile))
        System.IO.File.Delete(destinationFile);

    string[] sSrcFile;
    sSrcFile = new string[2];

    string[] arr = new string[2];
    for (int i = 0; i <= sourceFiles.Length - 1; i++)
    {
        if (sourceFiles[i] != null)
        {
            if (sourceFiles[i].Trim() != "")
                arr[i] = sourceFiles[i].ToString();
        }
    }

    if (arr != null)
    {
        sSrcFile = new string[2];

        for (int ic = 0; ic <= arr.Length - 1; ic++)
        {
            sSrcFile[ic] = arr[ic].ToString();
        }
    }
    try
    {
        int f = 0;

        PdfReader reader = new PdfReader(sSrcFile[f]);
        int n = reader.NumberOfPages;
        Response.Write("There are " + n + " pages in the original file.");
        Document document = new Document(PageSize.A4);

        PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(destinationFile, FileMode.Create));

        document.Open();
        PdfContentByte cb = writer.DirectContent;
        PdfImportedPage page;

        int rotation;
        while (f < sSrcFile.Length)
        {
            int i = 0;
            while (i < n)
            {
                i++;

                document.SetPageSize(PageSize.A4);
                document.NewPage();
                page = writer.GetImportedPage(reader, i);

                rotation = reader.GetPageRotation(i);
                if (rotation == 90 || rotation == 270)
                {
                    cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
                }
                else
                {
                    cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
                }
                Response.Write("\n Processed page " + i);
            }

            f++;
            if (f < sSrcFile.Length)
            {
                reader = new PdfReader(sSrcFile[f]);
                n = reader.NumberOfPages;
                Response.Write("There are " + n + " pages in the original file.");
            }
        }
        Response.Write("Success");
        document.Close();
    }
    catch (Exception e)
    {
        Response.Write(e.Message);
    }


}

使用此时,我遇到了一个问题,我的内容被截断了。 - Aneeq Azam Khan

4
合并多个PDF文件的字节数组:
    public static byte[] MergePDFs(List<byte[]> pdfFiles)
    {  
        if (pdfFiles.Count > 1)
        {
            PdfReader finalPdf;
            Document pdfContainer;
            PdfWriter pdfCopy;
            MemoryStream msFinalPdf = new MemoryStream();

            finalPdf = new PdfReader(pdfFiles[0]);
            pdfContainer = new Document();
            pdfCopy = new PdfSmartCopy(pdfContainer, msFinalPdf);

            pdfContainer.Open();

            for (int k = 0; k < pdfFiles.Count; k++)
            {
                finalPdf = new PdfReader(pdfFiles[k]);
                for (int i = 1; i < finalPdf.NumberOfPages + 1; i++)
                {
                    ((PdfSmartCopy)pdfCopy).AddPage(pdfCopy.GetImportedPage(finalPdf, i));
                }
                pdfCopy.FreeReader(finalPdf);

            }
            finalPdf.Close();
            pdfCopy.Close();
            pdfContainer.Close();

            return msFinalPdf.ToArray();
        }
        else if (pdfFiles.Count == 1)
        {
            return pdfFiles[0];
        }
        return null;
    }

1

我没有在任何地方看到这个解决方案,据说...根据一个人的说法,正确的方法是使用copyPagesTo()。我测试了这个方法,它确实有效。你的结果可能因城市和高速公路驾驶而异。祝好运。

    public static bool MergePDFs(List<string> lststrInputFiles, string OutputFile, out int iPageCount, out string strError)
    {
        strError = string.Empty;

        PdfWriter pdfWriter = new PdfWriter(OutputFile);
        PdfDocument pdfDocumentOut = new PdfDocument(pdfWriter);

        PdfReader pdfReader0 = new PdfReader(lststrInputFiles[0]);
        PdfDocument pdfDocument0 = new PdfDocument(pdfReader0);
        int iFirstPdfPageCount0 = pdfDocument0.GetNumberOfPages();
        pdfDocument0.CopyPagesTo(1, iFirstPdfPageCount0, pdfDocumentOut);
        iPageCount = pdfDocumentOut.GetNumberOfPages();

        for (int ii = 1; ii < lststrInputFiles.Count; ii++)
        {
            PdfReader pdfReader1 = new PdfReader(lststrInputFiles[ii]);
            PdfDocument pdfDocument1 = new PdfDocument(pdfReader1);
            int iFirstPdfPageCount1 = pdfDocument1.GetNumberOfPages();
            iPageCount += iFirstPdfPageCount1;
            pdfDocument1.CopyPagesTo(1, iFirstPdfPageCount1, pdfDocumentOut);
            int iFirstPdfPageCount00 = pdfDocumentOut.GetNumberOfPages();
        }

        pdfDocumentOut.Close();

        return true;
    }

2
你的解决方案适用于iText 7,而问题和其他答案关注的是iText 5。 - mkl
1
这是正确的,也是我发布它的原因。我发现几乎所有其他答案都已经过时了,但我应该指出其中的区别。我发现的每一个答案都是针对iText 5的,所以我认为发布一个适用于当前iText 7的答案是个好主意。 - Miguelito

0
请访问并阅读本文,其中我详细解释了如何使用C#中的Itextsharp将多个PDF文件合并为单个PDF文件How to Merge Multiple PDF Files Into Single PDF Using Itextsharp in C#实现:
try
{
    string FPath = "";
    // Create For loop for get/create muliple report on single click based on row of gridview control
    for (int j = 0; j < Gridview1.Rows.Count; j++)
    {
        // Return datatable for data
        DataTable dtDetail = new My_GlobalClass().GetDataTable(Convert.ToInt32(Gridview1.Rows[0]["JobId"]));
 
        int i = Convert.ToInt32(Gridview1.Rows[0]["JobId"]);
        if (dtDetail.Rows.Count > 0)
        {
            // Create Object of ReportDocument
            ReportDocument cryRpt = new ReportDocument();
            //Store path of .rpt file
            string StrPath = Application.StartupPath + "\\RPT";
            StrPath = StrPath + "\\";
            StrPath = StrPath + "rptCodingvila_Articles_Report.rpt";
            cryRpt.Load(StrPath);
            // Assign Report Datasource
            cryRpt.SetDataSource(dtDetail);
            // Assign Reportsource to Report viewer
            CryViewer.ReportSource = cryRpt;
            CryViewer.Refresh();
            // Store path/name of pdf file one by one 
            string StrPathN = Application.StartupPath + "\\Temp" + "\\Codingvila_Articles_Report" + i.ToString() + ".Pdf";
            FPath = FPath == "" ? StrPathN : FPath + "," + StrPathN;
            // Export Report in PDF
            cryRpt.ExportToDisk(CrystalDecisions.Shared.ExportFormatType.PortableDocFormat, StrPathN);
        }
    }
    if (FPath != "")
    {
        // Check for File Existing or Not
        if (System.IO.File.Exists(Application.StartupPath + "\\Temp" + "\\Codingvila_Articles_Report.pdf"))
            System.IO.File.Delete(Application.StartupPath + "\\Temp" + "\\Codingvila_Articles_Report.pdf");
        // Split and store pdf input file
        string[] files = FPath.Split(',');
        //  Marge Multiple PDF File
        MargeMultiplePDF(files, Application.StartupPath + "\\Temp" + "\\Codingvila_Articles_Report.pdf");
        // Open Created/Marged PDF Output File
        Process.Start(Application.StartupPath + "\\Temp" + "\\Codingvila_Articles_Report.pdf");
        // Check and Delete Input file
        foreach (string item in files)
        {
            if (System.IO.File.Exists(item.ToString()))
                System.IO.File.Delete(item.ToString());
        }
 
    }
}
catch (Exception ex)
{
    XtraMessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}

创建合并PDF的函数

public static void MargeMultiplePDF(string[] PDFfileNames, string OutputFile)
{
    iTextSharp.text.Document PDFdoc = new iTextSharp.text.Document();
    using (System.IO.FileStream MyFileStream = new System.IO.FileStream(OutputFile, System.IO.FileMode.Create))
    {
        iTextSharp.text.pdf.PdfCopy PDFwriter = new iTextSharp.text.pdf.PdfCopy(PDFdoc, MyFileStream);
        if (PDFwriter == null)
        {
            return;
        }
        PDFdoc.Open();
        foreach (string fileName in PDFfileNames)
        {
            iTextSharp.text.pdf.PdfReader PDFreader = new iTextSharp.text.pdf.PdfReader(fileName);
            PDFreader.ConsolidateNamedDestinations();
            for (int i = 1; i <= PDFreader.NumberOfPages; i++)
            {
                iTextSharp.text.pdf.PdfImportedPage page = PDFwriter.GetImportedPage(PDFreader, i);
                PDFwriter.AddPage(page);
            }
            iTextSharp.text.pdf.PRAcroForm form = PDFreader.AcroForm;
            if (form != null)
            {
                PDFwriter.CopyAcroForm(PDFreader);
            }
            PDFreader.Close();
        }
        PDFwriter.Close();
        PDFdoc.Close();
    }
}

1
对于与iText(Sharp) 2.x、4.x和5.x合并,使用基于PdfCopy的解决方案通常比使用基于PdfWriter的解决方案更好。 - mkl

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接