如何使用pdf.js库呈现整个PDF文档?

8
我尝试使用pdf.js库来渲染PDF文档。我只了解JavaScript的基础知识,对promise不熟悉。因此,起初我按照Render .pdf to single Canvas using pdf.js and ImageData(第二个答案)页面上的建议操作。但结果是所有页面都是空白的,除了图片和颜色正常,没有一行文字。我也尝试了其他一些教程,但要么得到相同的结果,要么完全没有文档。现在,我的代码看起来像这样:(几乎与教程完全相同)
function loadPDFJS(pid, pageUrl){

    PDFJS.disableWorker = true;
    PDFJS.workerSrc = 'pdfjs/build/pdf.worker.js';

    var canvas = document.createElement('canvas');
    var ctx = canvas.getContext('2d');
    var pages = [];
    var currentPage = 1;

    var url = '/search/nimg/IMG_FULL/' + pid + '#page=1';

    PDFJS.getDocument(url).then(function (pdf) {

        if(currentPage <= pdf.numPages) getPage();

        function getPage() {
            pdf.getPage(currentPage).then(function(page){
                var scale = 1.5;
                var viewport = page.getViewport(scale);

                canvas.height = viewport.height;
                canvas.width = viewport.width;

                var renderContext = {
                    canvasContext: ctx,
                    viewport: viewport
                };

                page.render(renderContext).then(function() {
                    pages.push(canvas.toDataURL());
                    if(currentPage < pdf.numPages) {
                        currentPage++;
                        getPage();
                    } else {
                        done();
                    }
                });
            });
        }
    });

    function done() {
        for(var i = 0; i < pages.length; i++){
            drawPage(i, addPage);
        }
    }

    function addPage(img){
        document.body.appendChild(img);
    }

    function drawPage(index, callback){
        var img = new Image;
        img.onload = function() {
            ctx.drawImage(this, 0, 0, ctx.canvas.width, ctx.canvas.height);
            callback(this);
        }
        img.src = pages[index];
    }
}

尝试将画布添加到DOM/文档中。 - async5
4个回答

5

感谢 @user3913960,你的概念对我很有帮助。我在你的代码中发现了一些问题,我进行了修复。下面是修复后的代码:

function loadPDFJS(pageUrl) {
    PDFJS.workerSrc = 'resources/js/pdfjs/pdf.worker.js';
    var currentPage = 1;
    var pages = [];
    var globalPdf = null;
    var container = document.getElementById('pdf-container');
    function renderPage(page) {
        //
        // Prepare canvas using PDF page dimensions
        //
        var canvas = document.createElement('canvas');
        // Link: https://dev59.com/gmcs5IYBdhLWcg3wOBRC#13039183
        // Canvas width should be set to the window's width for appropriate
        // scaling factor of the document with respect to the canvas width
        var viewport = page.getViewport(window.screen.width / page.getViewport(1.0).width);
        // append the created canvas to the container
        container.appendChild(canvas);
        // Get context of the canvas
        var context = canvas.getContext('2d');
        canvas.height = viewport.height;
        canvas.width = viewport.width;
        //
        // Render PDF page into canvas context
        //
        var renderContext = {
            canvasContext: context,
            viewport: viewport
        };
        page.render(renderContext).then(function () {
            if (currentPage < globalPdf.numPages) {
                pages[currentPage] = canvas;
                currentPage++;
                globalPdf.getPage(currentPage).then(renderPage);
            } else {
                // Callback function here, which will trigger when all pages are loaded
            }
        });
    }
    PDFJS.getDocument(pageUrl).then(function (pdf) {
        if(!globalPdf){
            globalPdf = pdf;
        }
        pdf.getPage(currentPage).then(renderPage);
    });
}
loadPDFJS("somepdffilenamehere.pdf");

5

我刚才重新审视了我的代码,并从头开始。我把它简化了,最终让它能够正常工作。现在看起来像这样:

var canvasContainer = document.getElementById('pdfImageImg');
function loadPDFJS(pid, pageUrl){

    PDFJS.workerSrc = 'pdfjs/build/pdf.worker.js';

    var currentPage = 1;
    var pages = [];
    var url = '/search/nimg/IMG_FULL/' + pid + '#page=1';

    PDFJS.getDocument(url).then(function(pdf) {
            pdf.getPage(currentPage).then(renderPage);

    function renderPage(page) {
        var height = 700;
        var viewport = page.getViewport(1);
        var scale = height / viewport.height;
        var scaledViewport = page.getViewport(scale);

        var canvas = document.createElement('canvas');
        var context = canvas.getContext('2d');
        canvas.height = scaledViewport.height;
        canvas.width = scaledViewport.width;

        var renderContext = {
            canvasContext: context,
            viewport: scaledViewport
        };
        page.render(renderContext).then(function () {
            if(currentPage < pdf.numPages) {
                pages[currentPage] = canvas;
                currentPage++;
                pdf.getPage(currentPage).then(renderPage);
            } else {
                for (var i = 1; i < pages.length; i++) {
                    document.getElementById('pdfImageImg').appendChild(pages[i]);
                }
            }
        });
    }

    });
}

你已经在第一行声明了PDF DOM容器(canvasContainer),为什么在for循环中不使用它,而是再次使用getElementById? - llioor

3

pdfjs-dist库包含用于构建PDF查看器的部分。您可以使用PDFPageView来呈现所有页面。基于https://github.com/mozilla/pdf.js/blob/master/examples/components/pageviewer.html

var url = "https://cdn.mozilla.net/pdfjs/tracemonkey.pdf";
var container = document.getElementById('container');
// Load document
PDFJS.getDocument(url).then(function (doc) {
  var promise = Promise.resolve();
  for (var i = 0; i < doc.numPages; i++) {
    // One-by-one load pages
    promise = promise.then(function (id) {
      return doc.getPage(id + 1).then(function (pdfPage) {
// Add div with page view.
var SCALE = 1.0; 
var pdfPageView = new PDFJS.PDFPageView({
      container: container,
      id: id,
      scale: SCALE,
      defaultViewport: pdfPage.getViewport(SCALE),
      // We can enable text/annotations layers, if needed
      textLayerFactory: new PDFJS.DefaultTextLayerFactory(),
      annotationLayerFactory: new PDFJS.DefaultAnnotationLayerFactory()
    });
    // Associates the actual page with the view, and drawing it
    pdfPageView.setPdfPage(pdfPage);
    return pdfPageView.draw();        
      });
    }.bind(null, i));
  }
  return promise;
});
#container > *:not(:first-child) {
  border-top: solid 1px black; 
}
<link href="https://npmcdn.com/pdfjs-dist/web/pdf_viewer.css" rel="stylesheet"/>
<script src="https://npmcdn.com/pdfjs-dist/web/compatibility.js"></script>
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
<script src="https://npmcdn.com/pdfjs-dist/web/pdf_viewer.js"></script>

<div id="container" class="pdfViewer singlePageView"></div>

另请参阅如何使用PDF.JS显示整个PDF文件而非单页?


0
我使用了以下代码来呈现一个有多个页面的PDF。
PDFJS.disableWorker = true; // due to CORS
var canvas = document.createElement('canvas'),
ctx = canvas.getContext('2d'),
pages = [],
currentPage = 1,
url = 'your_pdf.pdf';

PDFJS.getDocument(url).then(function(pdf) {
  if (currentPage <= pdf.numPages) getPage();

  // main entry point/function for loop
  function getPage() {
    // when promise is returned do as usual
    pdf.getPage(currentPage).then(function(page) {
      var scale = 1;
      var viewport = page.getViewport(scale);
      canvas.height = viewport.height;
      canvas.width = viewport.width;

      var renderContext = {
        canvasContext: ctx,
        viewport: viewport
      };

      // now, tap into the returned promise from render:
      page.render(renderContext).then(function() {
        // store compressed image data in array
        pages.push(canvas.toDataURL());

        if (currentPage < pdf.numPages) {
          currentPage++;
          getPage(); // get next page
        } else {
          // after all the pages are parsed
          for (var i = 0; i < pages.length; i++) {
            drawPage(i);
          }
        }
      });
    });
  }
});

function drawPage(index, callback) {
  var img = new Image;
  img.onload = function() {
    ctx.drawImage(this, 0, 0, ctx.canvas.width, ctx.canvas.height);
    if (index > 0) img.style.display = 'inline-block';
    document.body.appendChild(img);
  }
  img.src = pages[index]; // start loading the data-uri as source
}

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接