#!/usr/bin/env perl
#
# summarize.pl - summarize by day
#
# Usage: summarize.pl ckidateIndex itemtypeIndex < filtered.txt > summarized.txt
# 
# Darren Hardy, (c) January 2009
#

my $idx_ckidate = shift;
my $idx_itemtype = shift;
die "Usage: summarize.pl [ckidateIndex itemtypeIndex] < filtered.txt > summarized.txt" 
	unless $idx_ckidate > 0 and $idx_itemtype > 0;

my %validItemTypes = ();
while (<DATA>) {
	chop;
	$validItemTypes{$_} = 1;
}

my %counts = ();
my $current_dt = '';
while (<STDIN>) {
	next if /^#/o;
	chop;
	my @data = split(/,/, $_);
	$ckidate = $data[$idx_ckidate-1];
	$itemtype = $data[$idx_itemtype-1];
	next unless $ckidate =~ /^\d\d\d\d-\d\d-\d\d$/o; # YYYY-MM-DD
	
	# assumes that flat file is sorted by ckidate
	if ($current_dt ne $ckidate) {
		&writeRecord();
		%counts = (); 				# reset counts
		$current_dt = $ckidate; 	# move window
		print STDERR "processing ", $ckidate, "\n";
	}
	
	# increment counter map for legal item types
	if ($validItemTypes{$itemtype}) {
		$counts{$itemtype} = 0 if !defined($counts{$itemtype});
		$counts{$itemtype}++;
	}
}

&writeRecord();
exit(0);

sub writeRecord {
	foreach $k (sort keys %counts) {
		print STDOUT join(',', ($current_dt, $k, $counts{$k})), "\n";
	}
}

__DATA__
acart
acbk
accas
accd
accdrom
acdisk
acdvd
acfold
ackit
acmap
acmus
acpam
acper
acphoto
acpost
acrec
acslide
acvhs
acvid
alvhs
arbk
arcas
arcd
arcdrom
ardvd
arkit
armap
armfc
armfm
armus
arnp
arper
arslide
arvhs
arweb
bcbk
bccas
bccd
bccdrom
bcdvd
bckit
bcvhs
blvhs
jcbk
jccas
jccd
jccdrom
jcdvd
jckit
jcmus
jcrec
jcvhs
jlvhs
jrbk
jrcd
jrkit
jrmus
jrrec
jrvhs
