Phil Lawrence on 23 Jan 2004 17:13:46 -0000 |
Mark Dominus wrote: > [lotso good stuff] Thanks for the details. I especially found the code sample and this explanation *very* helpful: > Here if the input is already a file, we use the file it's in. This > handles all three cases: > > pivot_dump | col_width # data is copied to /tmp/cw12345 > col_width foo # data is not copied > col_width < foo # data is not copied > > The second case has $input_file defined, and the third case has > $input_file undefined but -f STDIN true. I did end up going with a tempfile solution, though by using the above explanation I see I can smarten it up a bit... I think this will be the end goal: 1. if rewindable, just do that 2. else buffer up to a certain memory usage (default or passed in value), and then switch to tempfile solution #2 will take some research, I don't yet know if I can/how to check memory consumption of my @slurp variable on the fly. Anyway, attached for the list's enjoyment is my script for formatting delimited data. Works pretty nice. prl #!/usr/local/bin/perl use warnings; use diagnostics; use strict; use Getopt::Long; use File::Temp qw/ tempfile /; ### # parm defaults and validation ### my $in_delim = "\t"; my $out_delim; my $fixed_width; my $var_width; my %out_col_format = (); my $out_spacer; GetOptions ( "in_delimiter|delimiter|d=s" => \$in_delim, "out_delimiter|od=s" => \$out_delim, "fixed_width|fw:i" => \$fixed_width, # standalone flag, or can take a max width "variable_width|var_width|vw:i" => \$var_width, # standalone flag, or can take a max width "out_column_format|out_cf|ocf=s" => \%out_col_format, "out_spacer|out_s|os=s" => \$out_spacer, ); die "Specified both --fixed_width and --variable_width" if defined $fixed_width and defined $var_width; die "--out_column_format keys may be integers only" if grep /\D/, keys %out_col_format; my $qr_in_delim = qr/$in_delim/; ### # main logic ### if (!defined $fixed_width and !defined $var_width) { while (<>) { print format_line( line => $_ ,qr_in_delim => $qr_in_delim ,( defined $out_delim ? (out_delim => $out_delim) : () ) ,( keys %out_col_format ? (out_col_format => \%out_col_format) : () ) ,( defined $out_spacer ? (out_spacer => $out_spacer) : () ) ); } exit; } # Create tempfile which will be auto-deleted when program exits my $fh = tempfile(UNLINK => 1); my ($max_width, $in_max_width); if (defined $fixed_width) { if ($fixed_width > 0) { $max_width = $fixed_width; } else { $in_max_width = 0; } } if (defined $var_width) { if ($var_width > 0) { $max_width = $var_width; } $in_max_width = []; } for (<>) { print $fh $_; # if we need to collect max width info... if (defined $in_max_width) { chomp; if (ref $in_max_width) # we're looking for the widths of the widest field for *each* column { my @in = map { my $idx = -1; if (defined $out_col_format{++$idx}) { $_ = sprintf $out_col_format{$idx}, $_; } length; } split $qr_in_delim; for (0 .. $#in) { $in_max_width->[$_] = $in[$_] if !defined $in_max_width->[$_] or $in[$_] > $in_max_width->[$_]; } } else # we're looking for max width of all the fields { $in_max_width = ( sort { $b <=> $a } $in_max_width ,( sort { $b <=> $a } map { length } split $qr_in_delim )[0] )[0]; } } } my $out_col_width; if (defined $fixed_width) { $out_col_width = ( defined $max_width and $max_width < $in_max_width ) ? $max_width : $in_max_width; } if (defined $var_width) { $out_col_width = defined $max_width ? [ map { $max_width < $_ ? $max_width : $_ } @$in_max_width ] : $in_max_width; } seek $fh,0,0; while (<$fh>) { print format_line( line => $_ ,qr_in_delim => $qr_in_delim ,out_col_width => $out_col_width ,( defined $out_delim ? (out_delim => $out_delim) : () ) ,( keys %out_col_format ? (out_col_format => \%out_col_format) : () ) ,( defined $out_spacer ? (out_spacer => $out_spacer) : () ) ); } ### # subroutines ### sub format_line { my %parms = @_; die "No line parm was passed" unless defined $parms{line}; die "No qr_in_delim parm was passed" unless defined $parms{qr_in_delim}; # if other args were passed in if (keys %parms > 2) { # split line into fields chomp $parms{line}; my @fields = split /$parms{qr_in_delim}/, $parms{line}; # apply formatting to fields as needed $fields[$_] = sprintf($parms{out_col_format}->{$_}, $fields[$_]) for keys %{ $parms{out_col_format} }; # set col width if specified if (defined $parms{out_col_width}) { if (ref $parms{out_col_width}) { $fields[$_] = defined $fields[$_] ? sprintf ( '%' . $parms{out_col_width}->[$_] . 's' ,( length $fields[$_] > $parms{out_col_width}->[$_] ? unpack 'A' . $parms{out_col_width}->[$_], $fields[$_] : $fields[$_] ) ) : ' ' x $parms{out_col_width}->[$_] for (0 .. $#{$parms{out_col_width}}); } else { $fields[$_] = sprintf('%' . $parms{out_col_width} . 's' , $fields[$_]) for (0 .. $#fields); } } # reassemble the line $parms{line} = join ( $parms{out_delim} .( defined $parms{out_spacer} ? $parms{out_spacer} : '' ) ,@fields ) ."\n"; } return $parms{line}; }
|
|