Apache Commons CSV ➙ 百万行12秒
有没有现成的库可以帮助我加速处理?
是的,Apache Commons CSV 项目在我的经验中非常有效。
这里有一个使用 Apache Commons CSV 库写入和读取24列行的示例应用程序:一个整数顺序号,一个 Instant
,其余都是随机的 UUID
对象。
对于10,000行,写入和读取各需要约半秒钟。读取包括重构 Integer
、Instant
和 UUID
对象。
我的示例代码允许您切换对象的重组。我使用 MacBook Pro(Retina,15 英寸,2013 年底)上的 Java 12,2.3 GHz Intel Core i7,16 GB 1600 MHz DDR3,Apple 内置 SSD 运行了一百万行。这将创建一个 850 兆字节的文件。
对于一百万行,读取需要十秒钟,解析需要两秒钟:
- 写入:PT25.994816S
- 仅读取:PT10.353912S
- 读取和解析:PT12.219364S
源代码是单个 .java
文件。它有一个写入方法和一个 read
方法。这两种方法都从一个 main
方法中调用。
我通过调用
Files.newBufferedReader
打开了一个
BufferedReader
。
package work.basil.example;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Duration;
import java.time.Instant;
import java.util.UUID;
public class CsvReadingWritingDemo
{
public static void main ( String[] args )
{
CsvReadingWritingDemo app = new CsvReadingWritingDemo();
app.write();
app.read();
}
private void write ()
{
Instant start = Instant.now();
int limit = 1_000_000;
Path path = Paths.get( "/Users/basilbourque/IdeaProjects/Demo/csv.txt" );
try (
Writer writer = Files.newBufferedWriter( path, StandardCharsets.UTF_8 );
CSVPrinter printer = new CSVPrinter( writer , CSVFormat.RFC4180 );
)
{
printer.printRecord( "id" , "instant" , "uuid_01" , "uuid_02" , "uuid_03" , "uuid_04" , "uuid_05" , "uuid_06" , "uuid_07" , "uuid_08" , "uuid_09" , "uuid_10" , "uuid_11" , "uuid_12" , "uuid_13" , "uuid_14" , "uuid_15" , "uuid_16" , "uuid_17" , "uuid_18" , "uuid_19" , "uuid_20" , "uuid_21" , "uuid_22" );
for ( int i = 1 ; i <= limit ; i++ )
{
printer.printRecord( i , Instant.now() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() , UUID.randomUUID() );
}
} catch ( IOException ex )
{
ex.printStackTrace();
}
Instant stop = Instant.now();
Duration d = Duration.between( start , stop );
System.out.println( "Wrote CSV for limit: " + limit );
System.out.println( "Elapsed: " + d );
}
private void read ()
{
Instant start = Instant.now();
int count = 0;
Path path = Paths.get( "/Users/basilbourque/IdeaProjects/Demo/csv.txt" );
try (
Reader reader = Files.newBufferedReader( path , StandardCharsets.UTF_8) ;
)
{
CSVFormat format = CSVFormat.RFC4180.withFirstRecordAsHeader();
CSVParser parser = CSVParser.parse( reader , format );
for ( CSVRecord csvRecord : parser )
{
if ( true )
{
Integer id = Integer.valueOf( csvRecord.get( 0 ) );
Instant instant = Instant.parse( csvRecord.get( 1 ) );
for ( int i = 3 - 1 ; i <= 22 - 1 ; i++ )
{
UUID uuid = UUID.fromString( csvRecord.get( i ) );
}
}
count++;
if ( count % 1_000 == 0 )
{
}
}
} catch ( IOException e )
{
e.printStackTrace();
}
Instant stop = Instant.now();
Duration d = Duration.between( start , stop );
System.out.println( "Read CSV for count: " + count );
System.out.println( "Elapsed: " + d );
}
}