#!/usr/bin/perl use warnings; use strict; $|=1; use Sys::Hostname; my $hostname = Sys::Hostname->hostname(); $hostname =~ s/\..*$//; my $spool = "spool_shaf"; my $readchunk = 1024*1024; my $opt_s = 0; # seek to my $opt_j = 0; # dump junk my $opt_n = 0; # check n2n my $opt_m = "1g"; # max-read my $opt_S = 0; # sleep after each load, in fractional seconds use Getopt::Long qw(:config no_ignore_case bundling); GetOptions( 's=i' => \$opt_s, 'S=s' => \$opt_S, 'm=s' => \$opt_m, 'j' => \$opt_j, 'n' => \$opt_n, ) or die "funnyargs"; if ($opt_m =~ /^(\d+)([kmgt])$/i) { my ($n, $s,) = ($1, lc $2,); $opt_m = $n*{ k => 2**10, m => 2**20, g => 2**30, t => 2**40,}->{$s}; } elsif ($opt_m =~ /^(\d+)$/) { $opt_m = $1; } else { die "bad max: $opt_m"; } die "no $spool" unless -d $spool; my ($dbh, $sth,); if ($opt_n) { require TMA::LootDb; $dbh = TMA::LootDb::get_dbh(); die "no dbh" unless $dbh; $sth = $dbh->prepare_cached(qq{ select lid, size from n2n where sha1 = ? }) or die $dbh->errstr; } my ($buf, $offs,); for my $f (@ARGV) { my $if; open $if, "<", $f or die "cant open($f): $!"; printf "OPEN(%s): done\n", $f; if ($opt_s) { seek($if, $opt_s, 0) or die "cant seek: $!"; } $offs = $opt_s; my $junkdir = $f.".junk"; if ($opt_j) { unless (-d $junkdir) { mkdir $junkdir or die "cant create $junkdir: $!"; } die "no junkdir" unless -d $junkdir; } $buf = ""; &load_file($f, $if, $junkdir); close $if; } exit 0; sub load_file ($$$) { my ($fn, $if, $jd,) = @_; my $eof = 0; my $go = 1; while ($go) { printf "\nOFFS(%s): ", $offs; if (length($buf) < $readchunk) { $eof = (&read_some($if) ? 0 : 1); } if ($buf =~ /^(.*?)SHAD(.{20})(.{4})/s) { my ($junk, $sizebin, $hashbin,) = ($1, $3, $2,); if (length($junk)) { printf "%i junk, ", length($junk); &junk($junk, $jd, 1); } my $size = unpack "N", $sizebin; printf "SHAD %u size, ", $size; my $skip = length($junk)+4; if ($size > 100*$readchunk) { printf "skipping %i as oversized", $skip; substr $buf, 0, $skip, ""; $offs += $skip; &junk("SHAD", $jd, 0); next; } my $headlen = length($junk)+4+4+20; while (length($buf) < ($headlen+$size) && &read_some($if)) { printf "reading "; } my $data = substr $buf, $headlen, $size; die "unexpected size" unless length($data) == $size; use Digest::SHA; my $s1 = Digest::SHA->new(); $s1->add($data); my $sha1 = $s1->digest(); undef $s1; unless ($sha1 eq $hashbin) { printf "skipping %i as hashmismatched", $skip; substr $buf, 0, $skip, ""; $offs += $skip; &junk("SHAD", $jd, 0); next; } use MIME::Base32 qw( RFC ); my $urn = "urn:sha1:". lc MIME::Base32::encode($sha1); printf "good %s ", $urn; my $d = { sha1 => $sha1, size => $size, data => $data, nourl => "YES", }; if ($opt_n) { $sth->execute($sha1) or die $sth->errstr; my $ar = $sth->fetchall_arrayref(); $sth->finish(); if (defined $ar && @$ar) { die "multi ".join(",",map({$_->[0]} @$ar)) if scalar @$ar > 1; my ($l, $s,) = @{$ar->[0]}; die "size mismatch" unless $s == $size; printf "have lid:%i", $l; $d = undef; } } if (defined $d) { &spool($d); } if ($opt_S) { select undef, undef, undef, $opt_S; } substr $buf, 0, $headlen+$size, ""; $offs += $headlen+$size; next; } elsif ($eof && length($buf) <= 24) { &junk($buf, $jd, 1); printf " eof!\n"; $go = 0; last; } else { my $skip = length($buf)-24; printf "skipping %i as nonmatched", $skip; &junk(substr($buf, 0, -24), $jd, 0); $buf = substr $buf, -24; $offs += $skip; $eof = (&read_some($if) ? 0 : 1); next; } } } sub read_some ($) { my ($if,) = @_; if (length($buf) > 100*$readchunk) { die "max buffer size exceeded"; } if ($opt_m > 0 && ($offs-$opt_s) > $opt_m) { die "reached max-read"; } return sysread $if, $buf, $readchunk, length($buf); } #### my $junkfile; sub junk ($$$) { return unless $opt_j; my ($data, $jd, $done,) = @_; unless ($junkfile) { my $jf = $jd."/".$offs.".junk"; unlink $jf if -e $jf; die "exists $jf" if -e $jf; open $junkfile, ">", $jf or die "cant open $jf: $!"; } print $junkfile $data; if ($done) { close $junkfile or die "cant close JF: $!"; undef $junkfile; } } my $idcnt; sub spool { my $d = shift; die "not ref" unless ref $d; if (ref $d) { require Storable; $d = Storable::nfreeze($d); } use Digest::SHA qw/ sha1 /; my $h = sha1($d); my $D = $h.$d; my $now = time; die "no hostname" unless $hostname; $idcnt++; my $id = sprintf "%i.%s.%i.%i",$now,$hostname,$$,$idcnt; my $pref = sprintf "%03i", (($now+$idcnt) % 1000); mkdir $spool unless -d $spool; die "no spool: ".$spool unless -d $spool; mkdir $spool."/".$pref unless -d $spool."/".$pref; die "no spoolpref: ".$spool."/".$pref unless -d $spool."/".$pref; my $fn = sprintf "%s/%s/%s", $spool, $pref, $id; my $tn = sprintf "%s/%s/.%s.tmp", $spool, $pref, $id; die "file exists: $fn" if -e $fn; die "file exists: $tn" if -e $tn; open TF, ">", $tn or die "cant open $tn: $!"; print TF $D; close TF or die "cant close $tn: $!"; rename $tn, $fn or die "cant rename($tn,$fn): $!"; die "still present $tn" if -e $tn; die "not present $fn" unless -e $fn; use File::stat; my $s = stat($fn); die "stat($fn) failed: $!" unless $s; die "size mismatch" unless $s->size == length($D); # if ($readback) { # open RF, "<", $fn or die "cant readopen($fn): $!"; # local $/; # my $d = ; # close RF; # die "data mismatch" unless $d eq $D; # } printf " spooled"; return $fn; }