如何使用Apache POI处理空行?

3
我将使用Apache POI来读取xlsx文件,它能够正常工作。当遇到空行时,我该如何处理呢?我的文件包含500行,但是显示为105667行,其余行都为空。
代码如下:
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

/**
 *
 * @author SAMEEK
 */
public class readXLSXFile {
public int getNumberOfColumn(String fileName, int sheetIndex) throws FileNotFoundException, IOException {
    File inputFile = null;
    FileInputStream fis = null;
    XSSFWorkbook workbook = null;
    XSSFSheet sheet = null;
    XSSFRow row = null;
    int lastRowNum = 0;
    int lastCellNum = 0;


    // Open the workbook
    inputFile = new File(fileName);
    fis = new FileInputStream(inputFile);
    workbook = new XSSFWorkbook(fis);
    sheet = workbook.getSheetAt(sheetIndex);
    lastRowNum = sheet.getLastRowNum();

    for (int i = 0; i < lastRowNum; i++) {

        row = sheet.getRow(i);
        if (row != null) {
            if (row.getLastCellNum() > lastCellNum) {
                lastCellNum = row.getLastCellNum();
            }
        }
    }

    return lastCellNum;
}

public int getNumberOfRow(String fileName, int sheetIndex) throws FileNotFoundException, IOException {
    File inputFile = null;
    FileInputStream fis = null;
    XSSFWorkbook workbook = null;
    XSSFSheet sheet = null;
    int lastRowNum = 0;

    // Open the workbook
    inputFile = new File(fileName);
    fis = new FileInputStream(inputFile);
    workbook = new XSSFWorkbook(fis);
    sheet = workbook.getSheetAt(sheetIndex);
    lastRowNum = sheet.getLastRowNum();
    return lastRowNum;
}

public String[] getSheetName(String fileName) throws FileNotFoundException, IOException {
    int totalsheet = 0;
    int i = 0;
    String[] sheetName = null;
    File inputFile = null;
    FileInputStream fis = null;
    XSSFWorkbook workbook = null;

    // Open the workbook
    inputFile = new File(fileName);
    fis = new FileInputStream(inputFile);
    workbook = new XSSFWorkbook(fis);
    totalsheet = workbook.getNumberOfSheets();
    sheetName = new String[totalsheet];
    while (i < totalsheet) {
        sheetName[i] = workbook.getSheetName(i);
        i++;
    }

    return sheetName;
}

public int getNumberOfSheet(String fileName) throws FileNotFoundException, IOException {
    int totalsheet = 0;
    File inputFile = null;
    FileInputStream fis = null;
    XSSFWorkbook workbook = null;
    XSSFSheet sheet = null;
    int lastRowNum = 0;

    // Open the workbook
    inputFile = new File(fileName);
    fis = new FileInputStream(inputFile);
    workbook = new XSSFWorkbook(fis);
    totalsheet = workbook.getNumberOfSheets();
    return totalsheet;
}

public String[][] getSheetData(String fileName, int sheetIndex) throws FileNotFoundException, IOException, InvalidFormatException {
    String[][] data = null;
    int i = 0;
    int j = 0;Cell cell=null;
    long emptyrowcount = 0;
    InputStream inputStream = new FileInputStream(
            fileName);
    // Create a workbook object.
    Workbook wb = WorkbookFactory.create(inputStream);
    wb.setMissingCellPolicy(Row.CREATE_NULL_AS_BLANK);
    Sheet sheet = wb.getSheetAt(sheetIndex);
    // Iterate over all the row and cells
    int noOfColumns = getNumberOfColumn(fileName, sheetIndex);
    System.out.println("noOfColumns::" + noOfColumns);
    int noOfRows = getNumberOfRow(fileName, sheetIndex) + 1;
    System.out.println("noOfRows::" + noOfRows);
    data = new String[noOfRows][noOfColumns];

    for (int k = 0; k < noOfRows; k++) {
        Row row = sheet.getRow(k);
        if (row == null) {


        } else {
            j = 0;
            for (int l = 0; l < noOfColumns; l++) {
                // Cell cell = cit.next();
                cell = row.getCell(j);


                if (cell.getCellType() == cell.CELL_TYPE_BLANK) {
                    cell = row.getCell(j, Row.CREATE_NULL_AS_BLANK);
                }

                data[i][j] = getCellValueAsString(cell);
                j++;

            }
            i++;

        }
    }

    return data;
}

/**
 * This method for the type of data in the cell, extracts the data and
 * returns it as a string.
 */
public static String getCellValueAsString(Cell cell) {
    String strCellValue = null;
    if (cell != null) {
        switch (cell.getCellType()) {
            case Cell.CELL_TYPE_STRING:
                strCellValue = cell.toString();
                break;
            case Cell.CELL_TYPE_NUMERIC:
                if (DateUtil.isCellDateFormatted(cell)) {
                    SimpleDateFormat dateFormat = new SimpleDateFormat(
                            "dd/MM/yyyy");
                    strCellValue = dateFormat.format(cell.getDateCellValue());
                } else {
                    Double value = cell.getNumericCellValue();
                    Long longValue = value.longValue();
                    strCellValue = new String(longValue.toString());
                }
                break;
            case Cell.CELL_TYPE_BOOLEAN:
                strCellValue = new String(new Boolean(
                        cell.getBooleanCellValue()).toString());
                break;
            case Cell.CELL_TYPE_BLANK:
                strCellValue = "";
                break;

        }
    }

    return strCellValue;
}

public static void main(String s[]) {
    try {
        readXLSXFile readXLSxFile = new readXLSXFile();
        String[][] sheetData = readXLSxFile.getSheetData("F:/work.xlsx", 0);

        int columnLength = 0;
        columnLength = readXLSxFile.getNumberOfColumn("F:/work.xlsx", 0);
        int rowLength = 0;
        rowLength = readXLSxFile.getNumberOfRow("F:/work.xlsx", 0);


        int h = 0;
        int j = 0;
        while (j < rowLength) {
            h = 0;
            while (h < columnLength) {
                System.out.print("\t     " + sheetData[j][h]);
                h++;
            }
            System.out.println("");
            j++;
        }

    } catch (InvalidFormatException ex) {
        Logger.getLogger(readXLSFile.class.getName()).log(Level.SEVERE, null, ex);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(readXLSFile.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(readXLSFile.class.getName()).log(Level.SEVERE, null, ex);


       }
    }
}

请帮助我如何处理Excel表中的空行?

2个回答

5
如果你获取了一行数据并返回了null,那么说明该行没有在文件中存储任何数据 - 它完全为空白。
POI默认提供文件中的内容。使用单元格时,您可以设置一个MissingCellPolicy来控制如何处理缺失和空单元格。在Apache POI文档中有一些使用示例。对于行,它们要么存在要么不存在,因此在获取行时需要检查是否为null。

您介意在这个话题上再详细解释一下吗?您如何实际使用“MissingCellPolicy”来防止那些“null” / “blank”行出现“NullPointerException”? - e.doroskevic
我已经在这里添加了一个链接,指向包含代码示例的POI文档。这是否满足您的需求? - Gagravarr
我已经阅读了POI文档,但没有找到代码示例。我会继续研究。经过进一步的调查,我已经能够回答我的问题了。感谢您的回复。 - e.doroskevic

1
如果您的 .xlsx 文件包含任何空单元格的格式,poi 读取不会将其视为 null,但是如果要打印其值,则会出现 NullPointerException。为了理解它,我创建了一个工作表,并将第一列边界标记为“所有边框”以进行 10 行,但没有给它任何值。现在应用以下代码片段,输出 sheet.lastRowNum() 显示为 10,而 RowCountWithNullValue 为 990,RowCountWithoutNullValue 为 10。但是该工作表完全为空。如果取消注释打印语句,则会显示 NullPointerException。
public class Rough {
public static void main(String args[]) throws IOException{
    public static void main(String args[]) throws IOException{
    FileInputStream fin = new FileInputStream(AddressOfxlsxFile);
    XSSFWorkbook wb = new XSSFWorkbook(fin);
    XSSFSheet sheet = wb.getSheetAt(1);
    int RowCountWithNullValue=0, RowCountWithoutNullValue=0;
    for (int i=0;i<1000;i++){
        if (sheet.getRow(i)==null)
            RowCountWithNullValue++;
        else{
            RowCountWithoutNullValue++;
        //  System.out.println(sheet.getRow(0).getCell(0));
        }
    }
    System.out.println(sheet.getLastRowNum());
    System.out.println(RowCountWithNullValue+","+RowCountWithoutNullValue);
  }
}

我不确定您的情况是否相同,但如果您说您的文件包含500行,但它显示为105667行,这可能是其中之一原因。

1
这不是一个新问题吗,而不是一个答案吗?另外,在else块中为什么总是调用sheet.getRow(0)而不是sheet.getRow(i) - Gagravarr

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接