#!/usr/bin/env perl use Time::Local; # NOTE that the keys of this hash also determine the ORDER # in which the groups will be written!! my %known_group_types = ( 0 => 'INVALID', 1 => 'OZONE:POINT:SAMPLE:OBSERVED:60:0:24:60:PPB', 2 => 'OZONE:POINT:SAMPLE:OBSERVED:60:0:24:480:PPB', 3 => 'OZONE:POINT:SAMPLE:DERIVED:60:-240:24:480:PPB', 4 => 'OZONE:POINT:PEAK:OBSERVED:1440:0:1:60:PPB', 5 => 'OZONE:POINT:PEAK:OBSERVED:1440:0:1:480:PPB', 6 => 'OZONE-AQI:POINT:SAMPLE:DERIVED:60:-240:24:480:AQI', ); ################################################################################ ################################################################################ # ozone.pl - turn ozone.obs files in oms format 2 into csv for awips # # assumptions made by this decoder # # * iff a station appears in a group, it appears in every group # * iff a group has qc for a station, it will have qc for every station ################################################################################ ################################################################################ ################################################################################ # turn on a bunch of stuff for debuggin purposes # you may comment these out if you choose - for speed ################################################################################ #use warnings; #use strict; #use diagnostics; ################################################################################ # a list of all known group types # you may add new entries by composing a ':' delimited string from all the # UNIQUE fields of an ozone GROUP, namely # # VARIABLE -> UNIQUE # DATA_TYPE -> UNIQUE # MEASUREMENT_TYPE -> UNIQUE # CHARACTERISTIC -> UNIQUE # START_DTG -> NOT UNIQUE # END_DTG -> NOT UNIQUE # INTERVAL -> UNIQUE # START_REF -> UNIQUE # NUMSTEPS -> UNIQUE # AVG_TIME -> UNIQUE # UNITS -> UNIQUE # STATIONS -> NOT UNIQUE # # NOTE : START_DTG, END_DTG, and, STATION **ARE NOT UNIQUE** # so do not include them in the entry!! # # entry = # :::: # :::: # # SEE BELOW FOR EXAMPLES. # ################################################################################ ################################################################################ ################################################################################ # DECLARATIONS AND CONSTANTS ################################################################################ ################################################################################ # bunch -o- variables my ( $hour, $timestamp, $data_version_time, $utc, $tzone_offest, $yyyy, $mm, $dd, $hh, $nn, $ss, $line, $state, @states, $file, @files, $group, $n_stations, $key, $val, $COMMENT, $BEGIN_FILE, $BEGIN_GROUP, $BEGIN_DATA, $END_FILE, $END_GROUP, $END_DATA, $KEYVAL, $DATA_VERSION, $foo, $output_filename, $out, $verbose, ); $verbose = 0; # set this to 1 for REAMS of output!! $hour = 0; # states the parser can be in use constant INITIAL => 0; use constant FILE => 1; use constant GROUP => 2; use constant DATA => 3; # patterns used by parsers $COMMENT = qr/^\s*!/o; $KEYVAL = qr/^\s*([^,\s](?:[^,]*[^,\s])*),\s*([^,\s](?:.*[^,\s])*)\s*$/; $DATA_VERSION = qr/^\s*DATA_VERSION\s*(\d)+\s*$/; $BEGIN_FILE = qr/BEGIN_FILE/o; $BEGIN_GROUP = qr/BEGIN_GROUP/o; $BEGIN_DATA = qr/BEGIN_DATA/o; $END_FILE = qr/END_FILE/o; $END_GROUP = qr/END_GROUP/o; $END_DATA = qr/END_DATA/o; # global list of file objects @files = (); # stack of states, begin in the INTIAL state @states = (INITIAL); ################################################################################ # let's try to guess the output filename ################################################################################ $output_filename = $ARGV[0] or die "NO INPUT FILE!\n"; # try to build an output name based on input name # this is awips specific code - not needed $output_filename =~ s/tmp\/([^\/]+)$/Raw\/$1/; #print $output_filename,"\n"; $output_filename =~ s/\/[^\/]+\.(\d+)$/\/sonoma.$1/; #print $output_filename,"\n"; #exit; # forget it if we failed!!!! if ($output_filename eq $ARGV[0]) { undef $output_filename; } if ($output_filename) { open($out, "> $output_filename") or die "CANNOT OPEN OUTPUT FILE $output_filename\n"; } # oh well, just use stdout else { $out = *STDOUT; } ################################################################################ # PARSE MADNESS!!!! ################################################################################ # parse all files named on command line while ($line = <>) { next if $line =~ $COMMENT; $state = $states[-1]; if ($state == INITIAL) { if ($line =~ $BEGIN_FILE) { #print "\nFILE\n"; $file = Ozone::File->new (); push @files, $file; push @states, FILE; next; } next; } if ($state =~ FILE) { if ($line =~ $END_FILE) { pop @states; next; } if ($line =~ $BEGIN_GROUP) { $file->push_group (Ozone::Group->new ()); push @states, GROUP; next; } if ($line =~ $KEYVAL) { $file->{$1} = $2; if (index($line, 'DATA_VERSION') != -1) { $line =~ /(\d+)\s*$/o; $yyyy = substr $1, 0, 4; $mm = substr $1, 4, 2; $dd = substr $1, 6, 2; $hh = substr $1, 8, 2; $nn = substr $1, 10, 2; $data_version_time = Time::Local::timegm (0,$nn,$hh,$dd,$mm,$yyyy); $hour = $hh; next; } if (index($line, 'TZONE') != -1) { $line =~ /(\d+)\s*$/o; $tzone_offest = $1; $utc = $data_version_time + $tzone_offest * 3600; ($ss,$nn,$hh,$dd,$mm,$yyyy) = gmtime($utc); my $yy = substr $yyyy, 2, 2; $timestamp = sprintf "%02s/%02s/%02s %02s:%02s:%02s", $yy,$mm,$dd,$hh,$mm,$nn; $file->{timestamp} = $timestamp; next; } next; } next; } if ($state =~ GROUP) { if ($line =~ $END_GROUP) { pop @states; next; } if ($line =~ $BEGIN_DATA) { push @states, DATA; next; } if ($line =~ $KEYVAL) { $group = $file->{groups}->[-1]; $group->{$1} = $2; next; } next; } if ($state =~ DATA) { if ($line =~ $END_DATA) { pop @states; next; } my @fields = split (/,/o, $line); if (scalar @fields) { $group->push_row (\@fields); next; } next; } else { next; } } ################################################################################ ################################################################################ # dump REAMS of information if verbose is on ################################################################################ ################################################################################ if ($verbose) { foreach $file (@files) { $file->dump; } } ################################################################################ ################################################################################ # GENERATE CSV to $out ################################################################################ ################################################################################ $file->csv ($hour, \%known_group_types, $out); ################################################################################ ################################################################################ ################################################################################ ################################################################################ # both classes below COULD be in another file # but they are included here for simplicity package Ozone::File; { #use warnings; #use strict; #use vars '$AUTOLOAD'; # class vars my $c_fid; sub new { bless { FORMAT_VERSION => '', AGENCY => '', FILENAME => '', DATA_VERSION => '', TZONE => '', QC_LEVEL => '', groups => [], fid => $c_fid++, gid => 0, stations => {}, codes => [], timestamp => 0, }, $_[0]; } sub push_group { my ($self, $group) = @_; push @{$self->{groups}}, $group; $group->{gid} = $self->{gid}++; $group->{file} = $self; } sub dump { my $self = shift; print "\nFILE $self->{fid}\n"; print "\tFORMAT_VERSION : ", $self->{FORMAT_VERSION} , "\n"; print "\tAGENCY : ", $self->{AGENCY} , "\n"; print "\tFILENAME : ", $self->{FILENAME} , "\n"; print "\tDATA_VERSION : ", $self->{DATA_VERSION} , "\n"; print "\tTZONE : ", $self->{TZONE} , "\n"; print "\tQC_LEVEL : ", $self->{QC_LEVEL} , "\n"; foreach my $group ($self->groups) { $group->dump; } } # write csv station records for all groups types named in @_ sub csv { my($self, $hour, $requested_group_types, $out) = @_; # yo. all the stations we know about my $f_stations = $self->{stations}; my $f_csv; # write ONE record for every station this ENTIRE file knows # about. this MANDATES that each group in the file ALSO has a record # for this station! for my $f_station (sort keys %{$f_stations}) { print "\nHANDLING STATION/CODE KEY <$f_station>\n" if $verbose; #my $g_csv = "$f_station" . "," . $self->{timestamp}; $f_csv .= "$f_station" . "," . $self->{timestamp}; # retrieve an array of ALL the groups associated with # this particular station my @s_groups = @{$f_stations->{$f_station}}; # attempt to write a record for each kind of requested group # in the nummeric order of their keys # this will FAIL if we find that a particular station # did not exist in that group! for my $r_group_type_k (sort keys %{$requested_group_types}) { # get the string identifying THIS group type my $r_group_type = $requested_group_types->{$r_group_type_k}; print "\n\tHANDLING GROUP TYPE <$r_group_type_k> => <$r_group_type>\n" if $verbose; # ignore the invalid group... next if $r_group_type eq 'INVALID'; # see if i seem to have a group or this type... my $group; foreach my $s_group (@s_groups) { $group = $s_group if $r_group_type eq ($s_group->{type} or $s_group->type()); last if $group; } # oh boy i DO have one of these group types! if ($group) { # now make the sure the group has an entry # for this station too if (exists $group->{stations}->{$f_station}) { my $g_table = $group->{table}; my $g_s_indices = $group->{stations}->{$f_station}; if ($verbose) { print "\t\t<$f_station> FOUND IN ROWS [", join (',', @{$g_s_indices}), "] OF GROUP <", $group->{gid}, "> OF FILE <", $self->{fid}, ">\n"; my $sample_row = join ',', @{$g_table->[0]}; $sample_row =~ s/\s//g; print "\t\tSAMPLE ROW (TABLE[0])\n", ('#'x79),"\n", $sample_row,"\n", ('#'x79),"\n"; } # make a good guess as to the index into # the table based on numsteps : # we really only handle 24 hour samples (24 cols) # and peak groups (1 col) my $g_numsteps = $group->{NUMSTEPS}; my $j; if (scalar $g_numsteps == 24) { $j = scalar $hour + 2; #skip f_station and code cols } elsif (scalar $g_numsteps == 1) { $j = -1; # the last (-1) column } else { die "COULD NOT DETERMINE INDEX INTO TABLE BASED ON HOUR ($hour)\n", "IN GROUP GID (",$group->{gid},") OF FILE (",$self->{fid},")\n"; } # o.k., alls well - append the cells my $cell; #my $cntrl_M; #$cntrl = qr/\x0d/o; foreach my $i (@{$g_s_indices}) { print "\t\tINDEXING GROUP TABLE WITH [$i, $j]\n" if $verbose; $cell = $g_table->[$i][$j]; chomp $cell; $cell =~ s/\x0d//o; print "\t\tCELL <$cell>\n" if $verbose; #$g_csv .= ",$cell"; $f_csv .= ",$cell"; } } # bummer, this group doesn't have that station! else { die "NO ENTRY FOUND FOR <$f_station>!\n", "IN GROUP GID (",$group->{gid},") OF FILE (",$self->{fid},")\n"; } } # bummer, i don't have this group type else { die "NO GROUP (<$r_group_type_k> => <$r_group_type>)\n", "ENTRY FOUND FOR <$f_station>!\n"; } } #$g_csv .= "\n"; $f_csv .= "\n"; if ($verbose) { print "CSV\n", ('#'x79),"\n", #$g_csv, $f_csv, ('#'x79),"\n"; } #print $out $g_csv; } print $out "\n\n\n\n" if $verbose; # print the entire buf in one big hit to disk print $out $f_csv; } 1; } # end package Ozone::File # package Ozone::Group; { #use warnings; #use strict; sub new { bless { VARIABLE => '', DATA_TYPE => '', MEASUREMENT_TYPE => '', CHARACTERISTIC => '', START_DTG => '', END_DTG => '', INTERVAL => '', START_REF => '', NUMSTEPS => '', AVG_TIME => '', UNITS => '', STATIONS => '', table => [], n_row => 0, gid => undef, stations => {}, type_key => undef, type => undef, file => undef, row_per_key => 1, has_qc => 0, }, $_[0]; } # change this to simply append to current columns iff new row # with same key is found!!!!!!!!! sub push_row { my ($self, $row) = @_; #push @{$self->{table}}, $row; $self->{table}->[$self->{n_row}] = $row; my $station = $row->[0]; my $code = $row->[1]; my $key = "$station,$code"; # hash station,code -->> index in table # e.g. i know all thow rows associated with and given # station,code key my $stations = $self->{stations}; $stations->{$key} = [] if not exists $stations->{$key}; push @{$stations->{$key}}, $self->{n_row}; # check if this group appears to have dupplicat (qc) rows # because if it does EVERY row will be then be expected to! my $r_p_key = scalar @{$stations->{$key}}; $self->{row_per_key} = $r_p_key if $r_p_key > $self->{row_per_key}; # guess that this group has qc information $self->{has_qc} = 1 if $self->{row_per_key} > 1; # let my parent know that i contain this station,code key # e.g. he will know that i contain rows for a given # station,code key, only i know WHICH rows, however my $file = $self->{file}; my $fstations = $file->{stations}; $fstations->{$key} = [] if not exists $fstations->{$key}; push @{$fstations->{$key}}, $self; $self->{n_row}++; } sub type { my $self = shift; # cache type for later use if it's not already known! if (not defined $self->{type}) { $self->{type} = join ':', ( $self->{VARIABLE}, $self->{DATA_TYPE}, $self->{MEASUREMENT_TYPE}, $self->{CHARACTERISTIC}, #$self->{START_DTG}, #$self->{END_DTG}, $self->{INTERVAL}, $self->{START_REF}, $self->{NUMSTEPS}, $self->{AVG_TIME}, $self->{UNITS}, #$self->{STATIONS}, ); } return $self->{type}; } sub dump { my $self = shift; print "\n\tGROUP $self->{gid}\n"; print "\t\tVARIABLE : ", $self->{VARIABLE } , "\n"; print "\t\tDATA_TYPE : ", $self->{DATA_TYPE } , "\n"; print "\t\tMEASUREMENT_TYPE : ", $self->{MEASUREMENT_TYPE} , "\n"; print "\t\tCHARACTERISTIC : ", $self->{CHARACTERISTIC } , "\n"; print "\t\tSTART_DTG : ", $self->{START_DTG } , "\n"; print "\t\tEND_DTG : ", $self->{END_DTG } , "\n"; print "\t\tINTERVAL : ", $self->{INTERVAL } , "\n"; print "\t\tSTART_REF : ", $self->{START_REF } , "\n"; print "\t\tNUMSTEPS : ", $self->{NUMSTEPS } , "\n"; print "\t\tAVG_TIME : ", $self->{AVG_TIME } , "\n"; print "\t\tUNITS : ", $self->{UNITS } , "\n"; print "\t\tSTATIONS : ", $self->{STATIONS } , "\n"; print "\n"; print "\t\tn_row : ", $self->{n_row} , "\n"; print "\t\trow_per_key : ", $self->{row_per_key} , "\n"; print "\t\thas_qc : ", $self->{has_qc} , "\n"; print "\t\ttype : ", $self->type , "\n"; print "\n\n\tTABLE\n"; print ("\t", ('#' x 79), "\n"); foreach my $row ($self->table) { print "\t", join ',', @{$row} } print ("\t", ('#' x 79), "\n"); print "\n"; my %stations = $self->stations(); foreach my $station (keys %stations) { my @sindices = $self->station_indices($station); print ("\tSTATION $station -> [", (join ',',@sindices) ,"]\n"); } } 1; } # end package Ozone::Group #