如何使用Apache POI根据行数将Excel文件拆分为多个文件?

7

我是apache poi的新手,我想根据行数将excel文件拆分成多个文件。

例如,data.xlsx有15k行,新文件应该像data_1.xlsx一样有5k行,data_2.xlsx应该有5k-10k行,data_3.xlsx应该有10k-15k行。


请确保您阅读并尝试改进您的问题:http://stackoverflow.com/help/mcve - Thiago Sá
3个回答

8

I've got you.

package com.industries.seanimus;  

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;  

public class ReportSplitter {

    private final String fileName;
    private final int maxRows;

    public ReportSplitter(String fileName, final int maxRows) {

        ZipSecureFile.setMinInflateRatio(0);

        this.fileName = fileName;
        this.maxRows = maxRows;

        try {
            /* Read in the original Excel file. */
            OPCPackage pkg = OPCPackage.open(new File(fileName));
            XSSFWorkbook workbook = new XSSFWorkbook(pkg);
            XSSFSheet sheet = workbook.getSheetAt(0);

            /* Only split if there are more rows than the desired amount. */
            if (sheet.getPhysicalNumberOfRows() >= maxRows) {
                List<SXSSFWorkbook> wbs = splitWorkbook(workbook);
                writeWorkBooks(wbs);
            }
            pkg.close();
        }
        catch (EncryptedDocumentException | IOException | InvalidFormatException e) {
            e.printStackTrace();
        }
    }

    private List<SXSSFWorkbook> splitWorkbook(XSSFWorkbook workbook) {

        List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>();

        SXSSFWorkbook wb = new SXSSFWorkbook();
        SXSSFSheet sh = wb.createSheet();

        SXSSFRow newRow;
        SXSSFCell newCell;

        int rowCount = 0;
        int colCount = 0;

        XSSFSheet sheet = workbook.getSheetAt(0);

        for (Row row : sheet) {
            newRow = sh.createRow(rowCount++);

            /* Time to create a new workbook? */
            if (rowCount == maxRows) {
                workbooks.add(wb);
                wb = new SXSSFWorkbook();
                sh = wb.createSheet();
                rowCount = 0;
            }

            for (Cell cell : row) {
                newCell = newRow.createCell(colCount++);
                newCell = setValue(newCell, cell);

                CellStyle newStyle = wb.createCellStyle();
                newStyle.cloneStyleFrom(cell.getCellStyle());
                newCell.setCellStyle(newStyle);
            }
            colCount = 0;
        }

        /* Only add the last workbook if it has content */
        if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) {
            workbooks.add(wb);
        }
        return workbooks;
    }

    /*
     * Grabbing cell contents can be tricky. We first need to determine what
     * type of cell it is.
     */
    private SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_STRING: 
            newCell.setCellValue(cell.getRichStringCellValue().getString());
            break;
        case Cell.CELL_TYPE_NUMERIC:
            if (DateUtil.isCellDateFormatted(cell)) {
                newCell.setCellValue(cell.getDateCellValue());
            } else {
                newCell.setCellValue(cell.getNumericCellValue());
            }
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            newCell.setCellValue(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:
            newCell.setCellFormula(cell.getCellFormula());
            break;
        default:
            System.out.println("Could not determine cell type");
        }
        return newCell;
    }

    /* Write all the workbooks to disk. */
    private void writeWorkBooks(List<SXSSFWorkbook> wbs) {
        FileOutputStream out;
        try {
            for (int i = 0; i < wbs.size(); i++) {
                String newFileName = fileName.substring(0, fileName.length() - 5);
                out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx"));
                wbs.get(i).write(out);
                out.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args){
        /* This will create a new workbook every 1000 rows. */
        new ReportSplitter("Data.xlsx", 1000);
    }

}

注意事项:

  • 为了编写工作簿,我使用SXSSFWorkbook。它比HSSF或XSSF快得多,因为在写入之前它不会将所有内容保存在内存中(这会导致可怕的垃圾回收问题)。

  • 忙碌开发人员指南是学习Apache POI的好帮手;)

享受吧!


编辑:我已更新代码以复制单元格样式。 关于此事需要注意两点:

  • 复制样式会显著减慢速度。
  • POI创建一个可能变得太大而无法解压缩、抛出检测到Zip炸弹错误的模板文件。 您可以通过更改最小膨胀比率来修复此问题,方法是通过ZipSecureFile.setMinInflateRatio(0)

1
这是如何复制必要的样式和其他相关工件的?还是假定所有内容都使用默认样式? - jmarkmurphy
我更新了代码以复制基本样式(字体、对齐、边框、颜色等)。然而,在Excel工作簿中有很多不同的属性,当处理大量数据时,它们开始创建大的延迟。如果需要特别的内容,我可以添加它,但是在大多数情况下都不会有必要尝试复制所有属性,这可能是一个错误。 - Seanimus

1

感谢您的代码。我只想提供两个小建议。 上面的代码没有复制时间,因此我对setValue代码进行了修改,以添加时间列,这是一个小修改。 基本上,我使用格式部分检查它是否为时间列,对于该列,年份将为1899。 希望能有所帮助:)

 private static SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_STRING: 
            newCell.setCellValue(cell.getRichStringCellValue().getString());
            break;
        case Cell.CELL_TYPE_NUMERIC:
            //System.out.println("The Cell Type is numeric ");
            if (DateUtil.isCellDateFormatted(cell)) {
                System.out.println(cell.getDateCellValue());
                Date c = cell.getDateCellValue();
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("hh:mm:ss");
                SimpleDateFormat year = new SimpleDateFormat("yyyy");
                String strTime = simpleDateFormat.format(c);
                String strYear=year.format(c);
                if(strYear.equals("1899"))
                {
                    System.out.println(strTime);
                    newCell.setCellValue(DateUtil.convertTime(strTime));    
                }
                else
                {
                    newCell.setCellValue(c);
                }



            } else {
                newCell.setCellValue(cell.getNumericCellValue());
            }
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            newCell.setCellValue(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:
            newCell.setCellFormula(cell.getCellFormula());
            break;
        default:
            System.out.println("Could not determine cell type");
        }
        return newCell;
    }

-1
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;  


public class SplitFile {

    private final String fileName;
    private final int maxRows;
    private final String path;
    private final String userfilename="";
    public static int filecount;
    public static String taskname;
    public static int rowcounter;

    public SplitFile(String fileName, final int maxRows, String filepath, String userfilename) throws FileNotFoundException     {
        path = filepath;
        taskname = userfilename;
        this.fileName = fileName;
        this.maxRows = maxRows;
        System.out.println("In Constructor");

        File file = new File(fileName);
        FileInputStream inputStream = new FileInputStream(file);
        try {
            /* Read in the original Excel file. */
            //OPCPackage pkg = OPCPackage.open(new File(fileName));
            Workbook workbook = new XSSFWorkbook(inputStream);
            XSSFSheet sheet = (XSSFSheet) workbook.getSheetAt(0);
            System.out.println("Got Sheet");
            /* Only split if there are more rows than the desired amount. */
            if (sheet.getPhysicalNumberOfRows() >= maxRows) {
                List<SXSSFWorkbook> wbs = splitWorkbook(workbook);
                writeWorkBooks(wbs);
            }

        }
        catch (EncryptedDocumentException | IOException e) {
            e.printStackTrace();
        }
    }

    private List<SXSSFWorkbook> splitWorkbook(Workbook workbook) {

        List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>();

        SXSSFWorkbook wb = new SXSSFWorkbook();
        SXSSFSheet sh = (SXSSFSheet) wb.createSheet();

        SXSSFRow newRow,headRow = null;
        SXSSFCell newCell;
        String headCellarr[] = new String[50];

        int rowCount = 0;
        int colCount = 0;
        int headflag = 0;
        int rcountflag = 0;
        int cols = 0;

        XSSFSheet sheet = (XSSFSheet) workbook.getSheetAt(0);
        //sheet.createFreezePane(0, 1);
        int i = 0;
        rowcounter++;
        for (Row row : sheet) {
            if(i==0)
            {
            //headRow = sh.createRow(rowCount++);

            /* Time to create a new workbook? */
            int j = 0;
            for (Cell cell : row) {

                //newCell = headRow.createCell(colCount++);
                headCellarr[j] = cell.toString();
                j++;
            }
            cols = j;
            colCount = 0;
            i++;
            }
            else
            {
                break;
            }

        }

        for (Row row : sheet) {

            //newRow = sh.createRow(rowCount++);
            /* Time to create a new workbook? */
            if (rowCount == maxRows) {
                headflag = 1;
                workbooks.add(wb);
                wb = new SXSSFWorkbook();
                sh = (SXSSFSheet) wb.createSheet();
                rowCount = 0;

            }
                if(headflag == 1)
                {
                    newRow = (SXSSFRow) sh.createRow(rowCount++);
                    headflag = 0;
                    for(int k=0;k<cols;k++)
                    {
                        newCell = (SXSSFCell) newRow.createCell(colCount++);
                        newCell.setCellValue(headCellarr[k]);

                    }
                    colCount = 0;
                    newRow = (SXSSFRow) sh.createRow(rowCount++);

                     for (Cell cell : row) {
                        newCell = (SXSSFCell) newRow.createCell(colCount++);
                        if(cell.getCellType() == Cell.CELL_TYPE_BLANK)
                        {
                            newCell.setCellValue("-");
                        }
                        else
                        {
                        newCell = setValue(newCell, cell);
                        }
                    }
                    colCount = 0;


                }
                else
                {
                    rowcounter++;
                newRow = (SXSSFRow) sh.createRow(rowCount++);
                for(int cn=0; cn<row.getLastCellNum(); cn++) {
                   // If the cell is missing from the file, generate a blank one
                   // (Works by specifying a MissingCellPolicy)
                   Cell cell = row.getCell(cn, Row.CREATE_NULL_AS_BLANK);
                   // Print the cell for debugging
                   //System.out.println("CELL: " + cn + " --> " + cell.toString());
                   newCell = (SXSSFCell) newRow.createCell(cn);
                   if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC)
                   {
                      newCell.setCellValue(cell.getNumericCellValue());
                   }
                   else
                   {
                   newCell.setCellValue(cell.toString());
                   }
            }
                }
              }


        /* Only add the last workbook if it has content */
        if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) {
            workbooks.add(wb);
        }
        return workbooks;
    }

    /*
     * Grabbing cell contents can be tricky. We first need to determine what
     * type of cell it is.
     */
    private SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_STRING: 
            newCell.setCellValue(cell.getRichStringCellValue().getString());
            break;
        case Cell.CELL_TYPE_NUMERIC:
            if (DateUtil.isCellDateFormatted(cell)) {
                newCell.setCellValue(cell.getDateCellValue());
            } else {
                //newCell.setCellValue(cell.getNumericCellValue());
                newCell.setCellValue(cell.toString());
            }
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            newCell.setCellValue(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:
            newCell.setCellFormula(cell.getCellFormula());
            break;
        case Cell.CELL_TYPE_BLANK:
            newCell.setCellValue("-");
            break;
        default:
            System.out.println("Could not determine cell type");
            newCell.setCellValue(cell.toString());

        }
        return newCell;
    }

    /* Write all the workbooks to disk. */
    private void writeWorkBooks(List<SXSSFWorkbook> wbs) {
        FileOutputStream out;
        boolean mdir = new File(path + "/split").mkdir();

        try {
            for (int i = 0; i < wbs.size(); i++) {
                String newFileName = fileName.substring(0, fileName.length() - 5);
                //out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx"));
                out = new FileOutputStream(new File(path + "/split/" + taskname + "_" + (i + 1) + ".xlsx"));
                wbs.get(i).write(out);
                out.close();
                System.out.println("Written" + i);
                filecount++;
            }
            System.out.println(userfilename);
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
    public int sendtotalrows()
    {
        return rowcounter;
    }

    public static void main(String[] args) throws FileNotFoundException{
        // This will create a new workbook every 1000 rows.
        // new Splitter(filename.xlsx, No of split rows, filepath, newfilename);
        new SplitFile("filepath/filename.xlsx", 10000, "filepath", "newfilename");  //No of rows to split: 10 K
    }
}

1
嗨。请在您的代码中添加一些解释。不要放整个类和代码。 - Procrastinator

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接