#!/usr/bin/perl -w

use strict;

if ($#ARGV != 2) {
    print "usage: $0 fasta out length\n";
    exit;
}

open (IN, "<$ARGV[0]") || die "cannot open $ARGV[0]: $!\n";
open (OUT, ">$ARGV[1]") || die "cannot open $ARGV[1]: $!\n";


my %data=();
my @names=();
my $seq='';
my $count=0;
my $len=0;

while (<IN>) {
    chomp;
    if (/^>(.+)/) {
	if ($seq) {
	    for (my $i=0; $i < length($seq)-$ARGV[2]; $i++) {
		$data{substr($seq, $i, $ARGV[2])}{$count}++;
	    }
	    $len+=length($seq);
	    $seq='';
	}
	push(@names, $1);
	$count++;
    }
    else {
	$seq.=$_;
    }
}
for (my $i=0; $i < length($seq)-$ARGV[2]; $i++) {
    $data{substr($seq, $i, $ARGV[2])}{$count}++;
}
$len+=length($seq);
$seq='';

warn scalar @names, " sequences of $len bp total read\n";
warn scalar keys %data, " kmers identified\n"; 

foreach (sort {scalar keys %{$data{$a}} <=> scalar keys %{$data{$b}}} keys %data) {
    print OUT "$_\t", scalar keys %{$data{$_}}, "\t", join ("\t", sort {$a <=> $b}keys %{$data{$_}}), "\n";
}