如何在Perl中摆脱强制文件打开?

5
open( my $handle, '<', 'file.dat' ) or die $!;
my @data = map { do_things($_) } <$handle>;
close $handle;

这个命令式的 openclose 在整个代码中显得很突兀。有没有更简洁的写法?我可以自己编写 read_file 的子程序,但应该已经有类似的东西了。

sub read_file {
    open( my $handle, '<', $_[0] ) or croak $!;
    return <$handle>;
}
my @data = map { do_things($_) } read_file('file.dat');

效率并不重要,但解决方案应该是跨平台的。


1
那段代码有一个漏洞:你忘记检查 close 的返回值了。 - tchrist
@tchrist:谢谢。我从来没有想过close可能会失败。 - Tim
3个回答

13

使用File::Slurp来处理文件,它是免费的。

use File::Slurp;
my @data = map {...} read_file($filename);

3

每个人都会写自己的这个。至少我的默认值是正确的。

#############################################################
#  File::Clowder - a herd of obedient cats
#
#   Tom Christiansen <tchrist@perl.com>
#   Sat May 28 09:17:32 MDT 2011
#############################################################
##
## ** THIS IS AN UNSUPPORTED, PRE-RELEASE VERSION ONLY **
##
#############################################################

package File::Clowder; 

use v5.10.1;
use strict;
use warnings;
use Carp;

#############################################################

use parent "Exporter";

our $VERSION    = v0.0.1;
our @EXPORT     = qw<cat>;
our @EXPORT_OK  = qw[
    cat         catfile         catfiles
    catascii    catlatin        piglatin
    rawfile     catbytes        file_bytes      
    file_string file_line       file_lines
    file_paras  file_records
    utf8_file   decode_file
];
our %EXPORT_TAGS = ( 
    all => [ @EXPORT, @EXPORT_OK ],
);

#############################################################

sub  cat            ( @         );
sub  catfiles       ( @         );
sub  catbytes       ( _         );
sub  rawfile        ( _         );
sub  catascii       ( _   ; $   );
sub  catfile        ( _   ; $   );
sub  catlatin       ( _   ; $   );
sub  piglatin       ( _   ; $   );

sub  file_bytes     ( $         );
sub  file_line      ( $         );
sub  file_lines     ( $         );
sub  file_paras     ( $         );
sub  file_records   ( $ $       );
sub  file_string    ( $         );

sub  utf8_file      ( $   ; $   );
sub  decode_file    ( $ $ ; $   );

sub _contents       ( $   ; $   );
sub  choke          ( $ @       );

our $_ENCODING;

#############################################################

sub choke($@) {
    my $func = (caller(1))[3]; 
    my $args = join q() => @_;
    local $Carp::CarpLevel = 2 unless our $DEBUG;
    confess "$func(): $args";
} 

sub catfiles(@) {
    my $many = wantarray();
    if ($many) {
        return map {catfile} @_;
    } 
    elsif (defined $many) {
        return join q() => map { scalar catfile } @_;
    } 
    else {
        catfile for @_;
    } 
    return scalar @_;
} 

BEGIN { *cat = \&catfiles }

sub catfile(_;$) {
    @_ == 1 || @_ == 2          || choke q<usage: [data =] catfile($;$)>;
    if (defined wantarray())    {  return  &utf8_file  } 
    else                        {  say for &utf8_file  }
} 

sub catascii(_;$) {
    @_ == 1 || @_ == 2          || choke q<usage: [data =] catascii($;$)>;
    if (defined wantarray())    {  return  &decode_file("US-ASCII", @_) }
    else                        {  say for &decode_file("US-ASCII", @_) }
} 

sub catlatin(_;$) {
    @_ == 1 || @_ == 2          || choke q<usage: [data =] catlatin($;$)>;
    if (defined wantarray())    {  return  &decode_file("ISO-8859-1", @_) }
    else                        {  say for &decode_file("ISO-8859-1", @_) }
} 

sub piglatin(_;$) {
    @_ == 1 || @_ == 2          || choke q<usage: [data =] piglatin($;$)>;
    if (defined wantarray())    {  return  &decode_file("CP1252", @_) }
    else                        {  say for &decode_file("CP1252", @_) }
} 

sub file_bytes($) {
    !wantarray()                || choke q<call me in scalar context>;
    @_ == 1                     || choke q<usage: $data = file_bytes($)>;
    local $_ENCODING;
    return scalar _contents($_[0], undef);
} 

sub rawfile(_) {
    @_ == 1                     || choke q<usage: $data = rawfile($)>;
    my $data = &file_bytes;
    return $data;
} 

BEGIN { *catbytes = \&rawfile }

sub file_line($) {
    @_ == 1                     || choke q<usage: @lines = file_lines($)>;
    return utf8_file($_[0], qr/\R/);
} 

sub file_lines($) {
    wantarray()                 || choke q<call me in list context>;
    @_ == 1                     || choke q<usage: @lines = file_lines($)>;
    return utf8_file($_[0], qr/\R/);
} 

sub file_paras($) {
    wantarray()                 || choke q<call me in list context>;
    @_ == 1                     || choke q<usage: @paras = file_paras($)>;
    return utf8_file($_[0], qr/\R+/);
} 

sub file_records($$) {
    wantarray()                 || choke q<call me in list context>;
    @_ == 2                     || choke q<usage: @recs = file_records($$)>;
    return &utf8_file;
} 

sub file_string($) {
    !wantarray()                || choke q<call me in scalar context>;
    @_ == 1                     || choke q<usage: $data = file_string($)>;
    return scalar utf8_file($_[0], undef);
}

sub utf8_file($;$) {
    @_ == 1 || @_ == 2          || choke q<usage: data = utf8_file($;$)>;
    return &decode_file("UTF-8", @_);
} 

sub decode_file($$;$) {
    @_ == 2 || @_ == 3          || choke q<usage: data = decode_file($$;$)>;
    local $_ENCODING = shift();
    return &_contents;
} 

sub _contents($;$) {
    my $many = wantarray()      // choke "don't call me in void context";
    @_ == 1 || @_ == 2          || choke q<usage: data = _contents($;$)>;

    my ( $fname,  $eol ) = 
       (  shift(),     );

    if (@_) {
        $eol = shift();
        $eol = qr/\R+/ if grep {defined && !length} $eol;
    } else {
        $eol = qr/\R/;
    } 

    $fname !~ / ^ \s* \+?  > /x || choke "'$fname' looks like output file";
    $fname !~ / ^ \s*  -? \| /x || choke "'$fname' looks like output pipe";
    open(my $fh, $fname)        || choke "can't open '$fname': $!";

    my $enc = $_ENCODING 
                ? ":encoding($_ENCODING)"
                : ":raw"
            ;

    binmode($fh, $enc)          || choke "can't binmode('$fname','$enc'): $!";

    my $data = do { 
        local $/ = undef;
        use warnings FATAL => "all";
        <$fh>;
    };

    my $piping = ($fname =~ / \| \s* \z /x );
    $! = 0;
    close($fh)                  || choke "can't close '$fname': " 
                                      . ($piping 
                                        ? qq<\$?=$? > 
                                        : qq<>
                                    ) . $!;
    unless ($many) {
        $data =~ s/ $eol \z //x if defined $eol;
        return $data; 
    } 

    my @data = split($eol // qr{\R}, $data);
    pop(@data) if @data && !length( $data[-1] );

    return @data;
} 

'ig00' ; __END__ #

谢谢。我想'ig00'只是返回1的愚蠢方式,但是以__END__ + 注释明确结束的目的是什么? - Tim
1
@Tim,它确保您永远不会添加更多的代码。我喜欢我的这些猫咪函数的版本,因为它们具有正确的默认值,包括但不限于在意外指向某些遗留的8位编码时具有正确的故障转移行为。而'ig00'是一个特殊字符串,由于其特殊性质,免受无用的常量警告;同样的原因也适用于'di'。 - tchrist

1

更加清洁,但代价是产生一个新的进程:

my @data = map { do_things($_) } split "\n", `cat file.dat`;

确实更简洁。生成新进程没问题,但我需要一些跨平台的东西。 - Tim

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接