#!/usr/bin/env perl

use Time::Local;

# NOTE that the keys of this hash also determine the ORDER
# in which the groups will be written!!
my %known_group_types =
(
  0 => 'INVALID',
  1 => 'OZONE:POINT:SAMPLE:OBSERVED:60:0:24:60:PPB',
  2 => 'OZONE:POINT:SAMPLE:OBSERVED:60:0:24:480:PPB',
  3 => 'OZONE:POINT:SAMPLE:DERIVED:60:-240:24:480:PPB',
  4 => 'OZONE:POINT:PEAK:OBSERVED:1440:0:1:60:PPB',
  5 => 'OZONE:POINT:PEAK:OBSERVED:1440:0:1:480:PPB',
  6 => 'OZONE-AQI:POINT:SAMPLE:DERIVED:60:-240:24:480:AQI',
);

################################################################################
################################################################################
# ozone.pl - turn ozone.obs files in oms format 2 into csv for awips
#
# assumptions made by this decoder
#
# * iff a station appears in a group, it appears in every group
# * iff a group has qc for a station, it will have qc for every station
################################################################################
################################################################################

################################################################################
# turn on a bunch of stuff for debuggin purposes
# you may comment these out if you choose - for speed
################################################################################
#use warnings;
#use strict;
#use diagnostics;


################################################################################
# a list of all known group types
# you may add new entries by composing a ':' delimited string from all the
# UNIQUE fields of an ozone GROUP, namely
#
# VARIABLE	    -> UNIQUE
# DATA_TYPE	    -> UNIQUE
# MEASUREMENT_TYPE  -> UNIQUE
# CHARACTERISTIC    -> UNIQUE
# START_DTG	    -> NOT UNIQUE
# END_DTG	    -> NOT UNIQUE
# INTERVAL	    -> UNIQUE
# START_REF	    -> UNIQUE
# NUMSTEPS	    -> UNIQUE
# AVG_TIME	    -> UNIQUE
# UNITS		    -> UNIQUE
# STATIONS	    -> NOT UNIQUE
#
# NOTE : START_DTG, END_DTG, and, STATION **ARE NOT UNIQUE**
#        so do not include them in the entry!!
#
# entry = 
# <VARIABLE>:<DATA_TYPE>:<MEASUREMENT_TYPE>:<CHARACTERISTIC>:
# <INTERVAL>:<START_REF>:<NUMSTEPS>:<AVG_TIME>:<UNITS>
#
# SEE BELOW FOR EXAMPLES.
#
################################################################################


################################################################################
################################################################################
# DECLARATIONS AND CONSTANTS
################################################################################
################################################################################

# bunch -o- variables
my 
(
  $hour,
  $timestamp,
  $data_version_time,
  $utc,
  $tzone_offest,
  $yyyy, $mm, $dd, $hh, $nn, $ss,

  $line,
  $state,
  @states,

  
  $file,
  @files,

  $group,
  $n_stations,

  $key,
  $val,

  $COMMENT,
  $BEGIN_FILE,
  $BEGIN_GROUP,
  $BEGIN_DATA,
  $END_FILE,
  $END_GROUP,
  $END_DATA,

  $KEYVAL,
  $DATA_VERSION,

  $foo,
  $output_filename,
  $out,

  $verbose,
);

$verbose = 0;	# set this to 1 for REAMS of output!!
$hour = 0;

# states the parser can be in
use constant INITIAL => 0;
use constant FILE => 1;
use constant GROUP => 2;
use constant DATA => 3;

# patterns used by parsers
$COMMENT = qr/^\s*!/o;
$KEYVAL = qr/^\s*([^,\s](?:[^,]*[^,\s])*),\s*([^,\s](?:.*[^,\s])*)\s*$/;
$DATA_VERSION = qr/^\s*DATA_VERSION\s*(\d)+\s*$/;
$BEGIN_FILE = qr/BEGIN_FILE/o;
$BEGIN_GROUP = qr/BEGIN_GROUP/o;
$BEGIN_DATA = qr/BEGIN_DATA/o;
$END_FILE = qr/END_FILE/o;
$END_GROUP = qr/END_GROUP/o;
$END_DATA = qr/END_DATA/o;


# global list of file objects
@files = (); 

# stack of states, begin in the INTIAL state
@states = (INITIAL);



################################################################################
# let's try to guess the output filename
################################################################################
$output_filename = $ARGV[0] or die "NO INPUT FILE!\n";

# try to build an output name based on input name
# this is awips specific code - not needed
$output_filename =~ s/tmp\/([^\/]+)$/Raw\/$1/;
#print $output_filename,"\n";
$output_filename =~ s/\/[^\/]+\.(\d+)$/\/sonoma.$1/;
#print $output_filename,"\n";
#exit;

# forget it if we failed!!!!
if ($output_filename eq $ARGV[0])
{
  undef $output_filename;
}
if ($output_filename)
{
  open($out, "> $output_filename") or 
    die "CANNOT OPEN OUTPUT FILE $output_filename\n"; 
}
# oh well, just use stdout
else
{
  $out = *STDOUT;
}



################################################################################
# PARSE MADNESS!!!!
################################################################################

# parse all files named on command line
while ($line = <>)
{
  next if $line =~ $COMMENT;

  $state = $states[-1]; 


  if ($state == INITIAL)
  {
    if ($line =~ $BEGIN_FILE)
    {
      #print "\nFILE\n";
      $file = Ozone::File->new ();
      push @files, $file; 
      push @states, FILE;
      next;
    }
    next;
  }

  if ($state =~ FILE)
  {
    if ($line =~ $END_FILE)
    {
      pop @states;
      next;
    }
    if ($line =~ $BEGIN_GROUP)
    {
      $file->push_group (Ozone::Group->new ());
      push @states, GROUP;
      next;
    }
    if ($line =~ $KEYVAL)
    {
      $file->{$1} = $2;

      if (index($line, 'DATA_VERSION') != -1)
      {
	$line =~ /(\d+)\s*$/o; 

	$yyyy = substr $1, 0, 4;
	$mm   = substr $1, 4, 2;
	$dd   = substr $1, 6, 2;
	$hh   = substr $1, 8, 2;
	$nn   = substr $1, 10, 2;

	$data_version_time = Time::Local::timegm (0,$nn,$hh,$dd,$mm,$yyyy); 
	$hour = $hh;
	next;
      }

      if (index($line, 'TZONE') != -1)
      {
	$line =~ /(\d+)\s*$/o;
	$tzone_offest = $1;

	$utc = $data_version_time + $tzone_offest * 3600;

	($ss,$nn,$hh,$dd,$mm,$yyyy) = gmtime($utc);	
	my $yy = substr $yyyy, 2, 2;

	$timestamp = sprintf "%02s/%02s/%02s %02s:%02s:%02s",
		             $yy,$mm,$dd,$hh,$mm,$nn;

	$file->{timestamp} = $timestamp;
	next;
      }
      next;
    }
    next;
  }

  if ($state =~ GROUP)
  {
    if ($line =~ $END_GROUP)
    {
      pop @states;
      next;
    }
    if ($line =~ $BEGIN_DATA)
    {
      push @states, DATA;
      next;
    }
    if ($line =~ $KEYVAL)
    {
      $group = $file->{groups}->[-1];
      $group->{$1} = $2;
      next;
    }
    next;
  }

  if ($state =~ DATA)
  {
    if ($line =~ $END_DATA)
    {
      pop @states;
      next;
    }

    my @fields = split (/,/o, $line);
    
    if (scalar @fields)
    {
      $group->push_row (\@fields);
      next;
    }
    next;
  }
  else
  {
    next;
  }
}


################################################################################
################################################################################
# dump REAMS of information if verbose is on
################################################################################
################################################################################
if ($verbose)
{
  foreach $file (@files) { $file->dump; }
}



################################################################################
################################################################################
# GENERATE CSV to $out
################################################################################
################################################################################

$file->csv ($hour, \%known_group_types, $out);


################################################################################
################################################################################
################################################################################
################################################################################
# both classes below COULD be in another file
# but they are included here for simplicity

package Ozone::File;
{
  #use warnings;
  #use strict;
  #use vars '$AUTOLOAD';

  # class vars
  my $c_fid;

  sub new
  {
    bless
    {
      FORMAT_VERSION => '',
      AGENCY => '',
      FILENAME => '',
      DATA_VERSION => '',
      TZONE => '',
      QC_LEVEL => '',

      groups => [],
      fid => $c_fid++,
      gid => 0,

      stations => {},
      codes    => [],

      timestamp => 0,
    }, $_[0];
  }
  sub push_group
  {
    my ($self, $group) = @_; 
    push @{$self->{groups}}, $group;
    $group->{gid} = $self->{gid}++; 
    $group->{file} = $self;
  }
  sub dump
  {
    my $self = shift;

    print "\nFILE $self->{fid}\n";
    print "\tFORMAT_VERSION : ", $self->{FORMAT_VERSION} , "\n";
    print "\tAGENCY         : ", $self->{AGENCY} , "\n";
    print "\tFILENAME       : ", $self->{FILENAME} , "\n";
    print "\tDATA_VERSION   : ", $self->{DATA_VERSION} , "\n";
    print "\tTZONE          : ", $self->{TZONE} , "\n";
    print "\tQC_LEVEL       : ", $self->{QC_LEVEL} , "\n";

    foreach my $group ($self->groups)
    {
      $group->dump;
    }
  }

  # write csv station records for all groups types named in @_
  sub csv
  {
    my($self, $hour, $requested_group_types, $out) = @_;

    # yo.  all the stations we know about
    my $f_stations = $self->{stations};

    my $f_csv;

    # write ONE record for every station this ENTIRE file knows 
    # about. this MANDATES that each group in the file ALSO has a record
    # for this station!
    for my $f_station (sort keys %{$f_stations})
    {
print "\nHANDLING STATION/CODE KEY <$f_station>\n" if $verbose; 
      #my $g_csv =  "$f_station" . "," . $self->{timestamp};
      $f_csv .=  "$f_station" . "," . $self->{timestamp};

      # retrieve an array of ALL the groups associated with
      # this particular station
      my @s_groups = @{$f_stations->{$f_station}};

       # attempt to write a record for each kind of requested group
       # in the nummeric order of their keys
       # this will FAIL if we find that a particular station
       # did not exist in that group!
       for my $r_group_type_k (sort keys %{$requested_group_types}) 
       {
        # get the string identifying THIS group type
 	my $r_group_type = $requested_group_types->{$r_group_type_k};	

print "\n\tHANDLING GROUP TYPE <$r_group_type_k> => <$r_group_type>\n" if $verbose;

	  # ignore the invalid group...
	  next if $r_group_type eq 'INVALID'; 

	  # see if i seem to have a group or this type...
	  my $group;
	  foreach my $s_group (@s_groups)
	  {
	    $group = $s_group if $r_group_type eq 
	      ($s_group->{type} or $s_group->type());
	    last if $group;
	  }

	  # oh boy i DO have one of these group types!
	  if ($group)
	  {
	    # now make the sure the group has an entry
	    # for this station too
	    if (exists $group->{stations}->{$f_station})
	    {
	      my $g_table = $group->{table};
	      my $g_s_indices = $group->{stations}->{$f_station};

if ($verbose)
{
print "\t\t<$f_station> FOUND IN ROWS [", 
  join (',', @{$g_s_indices}), 
  "] OF GROUP <", 
  $group->{gid},
  "> OF FILE <", 
  $self->{fid}, 
  ">\n";
my $sample_row = join ',', @{$g_table->[0]};
$sample_row =~ s/\s//g;
print "\t\tSAMPLE ROW (TABLE[0])\n",
  ('#'x79),"\n",
  $sample_row,"\n",
  ('#'x79),"\n";
}

	      # make a good guess as to the index into
	      # the table based on numsteps :
	      # we really only handle 24 hour samples (24 cols)
	      # and peak groups (1 col)
	      my $g_numsteps = $group->{NUMSTEPS};
	      my $j;

	      if (scalar $g_numsteps == 24)
	      {
		$j = scalar $hour + 2;	#skip f_station and code cols
	      }
	      elsif (scalar $g_numsteps == 1)
	      {
		$j = -1;  # the last (-1) column 
	      }
	      else
	      {
		die "COULD NOT DETERMINE INDEX INTO TABLE BASED ON HOUR ($hour)\n",
		    "IN GROUP GID (",$group->{gid},") OF FILE (",$self->{fid},")\n";
	      }

	      # o.k., alls well - append the cells
	      my $cell;
	      #my $cntrl_M;
	      #$cntrl = qr/\x0d/o;
	      foreach my $i (@{$g_s_indices})
	      {
print "\t\tINDEXING GROUP TABLE WITH [$i, $j]\n" if $verbose;
		$cell =  $g_table->[$i][$j];
		chomp $cell;
		$cell =~ s/\x0d//o;
print "\t\tCELL <$cell>\n" if $verbose;
		#$g_csv .= ",$cell"; 
		$f_csv .= ",$cell"; 
	      }
	    }
	    # bummer, this group doesn't have that station!
	    else
	    {
	      die "NO ENTRY FOUND FOR <$f_station>!\n",
		  "IN GROUP GID (",$group->{gid},") OF FILE (",$self->{fid},")\n";
	    }
	  }
	  # bummer, i don't have this group type
	  else
	  {
	      die "NO GROUP (<$r_group_type_k> => <$r_group_type>)\n",
	          "ENTRY FOUND FOR <$f_station>!\n";
	  }
	}

      #$g_csv .= "\n";
      $f_csv .= "\n";

if ($verbose)
{
print "CSV\n",
      ('#'x79),"\n",
      #$g_csv,
      $f_csv,
      ('#'x79),"\n";
}

      #print $out $g_csv;
    }
print $out "\n\n\n\n" if $verbose;

    # print the entire buf in one big hit to disk
    print $out $f_csv;
  } 

  1;
} # end package Ozone::File #



package Ozone::Group;
{
  #use warnings;
  #use strict;

  sub new
  {
    bless
    {
      VARIABLE => '',
      DATA_TYPE => '',
      MEASUREMENT_TYPE => '',
      CHARACTERISTIC => '',
      START_DTG => '',
      END_DTG => '',
      INTERVAL => '',
      START_REF => '',
      NUMSTEPS => '',
      AVG_TIME => '',
      UNITS => '',
      STATIONS => '',

      table => [],
      n_row => 0,
      gid => undef, 

      stations => {},

      type_key => undef,
      type => undef,
  
      file => undef,
      row_per_key => 1,
      has_qc => 0,
    }, $_[0];
  }

# change this to simply append to current columns iff new row
# with same key is found!!!!!!!!!
  sub push_row
  {
    my ($self, $row) = @_; 
    #push @{$self->{table}}, $row; 
    $self->{table}->[$self->{n_row}] = $row;

    my $station = $row->[0];
    my $code    = $row->[1];

    my $key = "$station,$code";


    # hash station,code -->> index in table
    # e.g. i know all thow rows associated with and given
    # station,code key
    my $stations = $self->{stations};
    $stations->{$key} = [] if not exists $stations->{$key};
    push @{$stations->{$key}}, $self->{n_row};    

    # check if this group appears to have dupplicat (qc) rows
    # because if it does EVERY row will be then be expected to!
    my $r_p_key = scalar @{$stations->{$key}};
    $self->{row_per_key} = $r_p_key if $r_p_key > $self->{row_per_key};

    # guess that this group has qc information
    $self->{has_qc} = 1 if $self->{row_per_key} > 1;

    # let my parent know that i contain this station,code key 
    # e.g. he will know that i contain rows for a given
    # station,code key, only i know WHICH rows, however
    my $file = $self->{file};
    my $fstations = $file->{stations};
    $fstations->{$key} = [] if not exists $fstations->{$key};
    push @{$fstations->{$key}}, $self;
    
    $self->{n_row}++;
  }
  sub type
  {
    my $self = shift;
    # cache type for later use if it's not already known!
    if (not defined $self->{type})
    {
      $self->{type} = join ':', 
			  (
			    $self->{VARIABLE},
			    $self->{DATA_TYPE},
			    $self->{MEASUREMENT_TYPE},
			    $self->{CHARACTERISTIC},
			    #$self->{START_DTG},
			    #$self->{END_DTG},
			    $self->{INTERVAL},
			    $self->{START_REF},
			    $self->{NUMSTEPS},
			    $self->{AVG_TIME},
			    $self->{UNITS},
			    #$self->{STATIONS},
			  );
    }
    return $self->{type};
  }
  sub dump
  {
    my $self = shift;

    print "\n\tGROUP $self->{gid}\n";
    print "\t\tVARIABLE         : ", $self->{VARIABLE        } , "\n";
    print "\t\tDATA_TYPE        : ", $self->{DATA_TYPE       } , "\n";
    print "\t\tMEASUREMENT_TYPE : ", $self->{MEASUREMENT_TYPE} , "\n";
    print "\t\tCHARACTERISTIC   : ", $self->{CHARACTERISTIC  } , "\n";
    print "\t\tSTART_DTG        : ", $self->{START_DTG       } , "\n";
    print "\t\tEND_DTG          : ", $self->{END_DTG         } , "\n";
    print "\t\tINTERVAL         : ", $self->{INTERVAL        } , "\n";
    print "\t\tSTART_REF        : ", $self->{START_REF       } , "\n";
    print "\t\tNUMSTEPS         : ", $self->{NUMSTEPS        } , "\n";
    print "\t\tAVG_TIME         : ", $self->{AVG_TIME        } , "\n";
    print "\t\tUNITS            : ", $self->{UNITS           } , "\n";
    print "\t\tSTATIONS         : ", $self->{STATIONS        } , "\n";

    print "\n";
    print "\t\tn_row            : ", $self->{n_row} , "\n";
    print "\t\trow_per_key      : ", $self->{row_per_key} , "\n";
    print "\t\thas_qc           : ", $self->{has_qc} , "\n";
    print "\t\ttype             : ", $self->type , "\n";

    print "\n\n\tTABLE\n";
    print ("\t", ('#' x 79), "\n");
    foreach my $row ($self->table)
    {
      print "\t", join ',', @{$row}
    }
    print ("\t", ('#' x 79), "\n");


    print "\n";
    my %stations = $self->stations();
    foreach my $station (keys %stations)
    {
      my @sindices = $self->station_indices($station);
      print ("\tSTATION $station -> [", (join ',',@sindices) ,"]\n");
    }
  }

  1;
} # end package Ozone::Group #