#! /usr/bin/perl

#
# ----------------------------------------------------------------------------
# "THE CAPPUCHINO-WARE LICENSE"
# Rainer Wichmann <rwichmann@la-samhna.de> wrote this file. As long as you 
# retain this notice you can do whatever you want with this stuff. If we 
# meet some day, and you think this stuff is worth it, you can buy me a 
# cappuchino in return. Rainer Wichmann
# ----------------------------------------------------------------------------
#
# Script to convert a spectrum in JCAMP-DX format into a simple
# text file with rows in 'x y' format (suitable e.g. for gnuplot,
# IDL, IRAF, or just anything else you like).
#
# JCAMP-DX is a format that IMHO is the result of re-inventing the wheel,
# and badly. The FITS format widely used e.g. in astronomy is almost
# 10 years older, has extensive (and **free**) software support,
# and can do everything that JCAMP-DX does, at least w.r.t. spectra.
#
# This script can handle the AFFN, PAC, SQZ, DIF, and DIFDUP form
# of JCAMP-DX TABULAR DATA in the (X++(Y..)) variable list format.

use strict;
use warnings;
use Getopt::Std;

my %opts;
my $USAGE = "Converts an input file in JCAMP-DX spectral format into a list of x y pairs\n\nUsage: jcamp_conv.pl [-dhq] input.file\n\t-d for debug output\n\t-h for help\n\t-q for NOT writing header info\n\nTo redirect standard output, use:\n\tjcamp_conv.pl input.file > output.file";
my ($infile) = '';
my $lnum = 0;

sub readBlock;

getopts('hdq', \%opts);
if (defined $opts{'h'}) {
    print "$USAGE\n";
    exit 0;
}

# process file
#
if (!defined($ARGV[0])) {
    print "$USAGE\n";
    die "ERROR: No input file given on command line !";
}

$infile = $ARGV[0];
if (defined $opts{'d'}) {
    print "Input file: $infile\n";
}

open FILE, "< $infile" or die "Cannot open $infile: $!";

while (0 == readBlock) {}; # loop over BLOCKs in file.


#
# Subroutines
#
sub dup_conv($) {
    my ($dup_line) = @_;

    $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)S/$1/g;
    $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)S/$1/g;
    $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)S/$1/g;
    $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)S/$1/g;

    $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)T/$1$1/g;
    $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)T/$1$1/g;
    $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)T/$1$1/g;
    $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)T/$1$1/g;

    $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)U/$1$1$1/g;
    $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)U/$1$1$1/g;
    $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)U/$1$1$1/g;
    $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)U/$1$1$1/g;

    $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)V/$1$1$1$1/g;
    $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)V/$1$1$1$1/g;
    $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)V/$1$1$1$1/g;
    $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)V/$1$1$1$1/g;

    $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)W/$1$1$1$1$1/g;
    $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)W/$1$1$1$1$1/g;
    $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)W/$1$1$1$1$1/g;
    $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)W/$1$1$1$1$1/g;

    $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)X/$1$1$1$1$1$1/g;
    $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)X/$1$1$1$1$1$1/g;
    $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)X/$1$1$1$1$1$1/g;
    $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)X/$1$1$1$1$1$1/g;

    $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)Y/$1$1$1$1$1$1$1/g;
    $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)Y/$1$1$1$1$1$1$1/g;
    $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)Y/$1$1$1$1$1$1$1/g;
    $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)Y/$1$1$1$1$1$1$1/g;

    $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)Z/$1$1$1$1$1$1$1$1/g;
    $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)Z/$1$1$1$1$1$1$1$1/g;
    $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)Z/$1$1$1$1$1$1$1$1/g;
    $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)Z/$1$1$1$1$1$1$1$1/g;

    $dup_line =~ s/(\%|J\d*|K\d*|L\d*|M\d*|N\d*|O\d*|P\d*|Q\d*|R\d*)s/$1$1$1$1$1$1$1$1$1/g;
    $dup_line =~ s/(j\d*|k\d*|l\d*|m\d*|n\d*|o\d*|p\d*|q\d*|r\d*)s/$1$1$1$1$1$1$1$1$1/g;
    $dup_line =~ s/(\@|A\d*|B\d*|C\d*|D\d*|E\d*|F\d*|G\d*|H\d*|I\d*)s/$1$1$1$1$1$1$1$1$1/g;
    $dup_line =~ s/(a\d*|b\d*|c\d*|d\d*|e\d*|f\d*|g\d*|h\d*|i\d*)s/$1$1$1$1$1$1$1$1$1/g;


    return $dup_line;
}

sub dif_conv($) {
    my ($dif_line) = @_;
    $dif_line =~ s/\%/ 0/g;
    $dif_line =~ s/J/ 1/g;
    $dif_line =~ s/K/ 2/g;
    $dif_line =~ s/L/ 3/g;
    $dif_line =~ s/M/ 4/g;
    $dif_line =~ s/N/ 5/g;
    $dif_line =~ s/O/ 6/g;
    $dif_line =~ s/P/ 7/g;
    $dif_line =~ s/Q/ 8/g;
    $dif_line =~ s/R/ 9/g;
    $dif_line =~ s/j/ -1/g;
    $dif_line =~ s/k/ -2/g;
    $dif_line =~ s/l/ -3/g;
    $dif_line =~ s/m/ -4/g;
    $dif_line =~ s/n/ -5/g;
    $dif_line =~ s/o/ -6/g;
    $dif_line =~ s/p/ -7/g;
    $dif_line =~ s/q/ -8/g;
    $dif_line =~ s/r/ -9/g;
    return $dif_line;
}

sub sqz_conv($) {
    my ($sqz_line) = @_;
    $sqz_line =~ s/\@/ 0/g;
    $sqz_line =~ s/A/ 1/g;
    $sqz_line =~ s/B/ 2/g;
    $sqz_line =~ s/C/ 3/g;
    $sqz_line =~ s/D/ 4/g;
    $sqz_line =~ s/E/ 5/g;
    $sqz_line =~ s/F/ 6/g;
    $sqz_line =~ s/G/ 7/g;
    $sqz_line =~ s/H/ 8/g;
    $sqz_line =~ s/I/ 9/g;
    $sqz_line =~ s/a/ -1/g;
    $sqz_line =~ s/b/ -2/g;
    $sqz_line =~ s/c/ -3/g;
    $sqz_line =~ s/d/ -4/g;
    $sqz_line =~ s/e/ -5/g;
    $sqz_line =~ s/f/ -6/g;
    $sqz_line =~ s/g/ -7/g;
    $sqz_line =~ s/h/ -8/g;
    $sqz_line =~ s/i/ -9/g;
    return $sqz_line;
}

# write out the data table
#
sub writeOut($$$$$$$) {
    my ($firstx, $lastx, $npoints, $count, $xfactor, $yfactor, $lines_r) = @_;
    my @lines = @{$lines_r};
    my $sqz_line;
    my $dif_line;
    my $dup_line;
    my $yold = 0;
    my $xold = 0;
    my $ycheck = 0;
    my @values;
    my $incr = ($lastx - $firstx) / ($npoints - 1.0);

    # for (my $i = 0; $i < $count; ++$i) { print "$lines[$i]\n"; }
    for (my $i = 0; $i < $count; ++$i) { 
	if ($lines[$i] =~ /[\%JKLMNOPQRjklmnopqr]/) {
	    if ($lines[$i] =~ /[STUVWYXZs]/) {
		print STDERR "DIFDUP -> $lines[$i]\n" if defined($opts{'d'});
		$dup_line = dup_conv($lines[$i]);
		print STDERR "DIFDUP <- $dup_line\n" if defined($opts{'d'});
	    } else {
		print STDERR "DIF    $lines[$i]\n" if defined($opts{'d'});
		$dup_line = $lines[$i];
	    }
	    $sqz_line = sqz_conv($dup_line);
	    $dif_line = dif_conv($sqz_line);
	    @values = split /\ +/, $dif_line;
	    #
	    # convert differences to abs values
	    #
	    for (my $i = 2; $i <= $#values; ++$i) {
		$values[$i] += $values[$i - 1];  
	    }
	    if ($ycheck == 1) {
		print STDERR "FAILED Y-VALUE CHECK line $lnum: $xold != $values[0] or $yold != $values[1]\n" 
		    if (($xold != $values[0]) or ($yold != $values[1]));
		$values[0] += $incr;
		for (my $i = 2; $i <= $#values; ++$i) {
		    printf "%f %f\n", 
		    $xfactor*$values[0], $yfactor*$values[$i];
		    $values[0] += $incr;
		} 
		$xold = $values[0] - $incr; $yold = $values[$#values];
	    } else {
		for (my $i = 1; $i <= $#values; ++$i) {
		    printf "%f %f\n", 
		    $xfactor*$values[0], $yfactor*$values[$i];
		    $values[0] += $incr;
		}
		$xold = $values[0] - $incr; $yold = $values[$#values];
	    }
	    $ycheck = 1;
	}
	elsif ($lines[$i] =~ /[\@ABCDEFGHIabcdefghi]/) {
	    print STDERR "SQZ    $lines[$i]\n" if defined($opts{'d'});
	    $sqz_line = sqz_conv($lines[$i]);
	    @values = split /\ +/, $sqz_line;
	    for (my $i = 1; $i <= $#values; ++$i) {
		printf "%f %f\n", $xfactor*$values[0], $yfactor*$values[$i];
		$values[0] += $incr;
	    }
	    $ycheck = 0;
	} 
	else {
	    print STDERR "PAC    $lines[$i]\n" unless (!defined($opts{'d'}));
	    $lines[$i] =~ s/\-/ -/g; 
	    $lines[$i] =~ s/\+/ +/g; 
	    @values = split /\ +/, $lines[$i];
	    for (my $i = 1; $i <= $#values; ++$i) {
		printf "%f %f\n", $xfactor*$values[0], $yfactor*$values[$i];
		$values[0] += $incr;
	    }
	    $ycheck = 0;
	}
    }
}

# read the data table
#
sub readTabular($$$$$) {
    my ($firstx, $lastx, $npoints, $xfactor, $yfactor) = @_;
    my @lines = '';
    my $count = 0;
    while (<FILE>) {
	++$lnum;
	if (/^\#\#END=/) {
	    writeOut($firstx, $lastx, $npoints, $count, 
		     $xfactor, $yfactor, \@lines);
	    print STDERR "END of block reached (readTabular)\n" 
		unless (!defined($opts{'d'}));
	    return 0;
	}
	$lines[$count] = $_;
	$lines[$count] =~ s/^\s*//;
	$lines[$count] =~ s/\s*$//;
	# print "$lines[$count]\n";
	++$count;
    }
}

# Read a single BLOCK
#
sub readBlock {
    my ($title, $xunits, $yunits, $xfactor, $yfactor);
    my ($firstx, $lastx, $npoints, $dummy);

    my $format  = 0;
    my $TABULAR = 1;

    
    while (<FILE>) {
	++$lnum;
	if (/^\#\#TITLE=\s*(.*)/) {
	    $title = $1; chomp($title);
	    print STDERR "TITLE   $title\n" unless (!defined($opts{'d'}));
	    print "# TITLE   $title\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#ORIGIN=\s*(.*)/) {
	    $dummy = $1; chomp($dummy);
	    print "# ORIGIN   $dummy\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#OWNER=\s*(.*)/) {
	    $dummy = $1; chomp($dummy);
	    print "# OWNER    $dummy\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#DATE=\s*(.*)/) {
	    $dummy = $1; chomp($dummy);
	    print "# DATE     $dummy\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#TIME=\s*(.*)/) {
	    $dummy = $1; chomp($dummy);
	    print "# TIME     $dummy\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#RESOLUTION=\s*(.*)/) {
	    $dummy = $1; chomp($dummy);
	    print "# RESOLUTION    $dummy\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#DELTAX=\s*(.*)/) {
	    $dummy = $1; chomp($dummy);
	    print "# DELTAX  $dummy\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#XUNITS=\s*(.*)/) {
	    $xunits = $1; chomp($xunits);
	    print STDERR "XUNITS  $xunits\n" unless (!defined($opts{'d'}));
	    print "# XUNITS  $xunits\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#YUNITS=\s*(.*)/) {
	    $yunits = $1; chomp($yunits);
	    print STDERR "YUNITS  $yunits\n" unless (!defined($opts{'d'}));
	    print "# YUNITS  $yunits\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#XFACTOR=\s*(.*)/) {
	    $xfactor = $1; chomp($xfactor);
	    print STDERR "XFACTOR $xfactor\n" unless (!defined($opts{'d'}));
	    print "# XFACTOR $xfactor\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#YFACTOR=\s*(.*)/) {
	    $yfactor = $1; chomp($yfactor);
	    print STDERR "YFACTOR $yfactor\n" unless (!defined($opts{'d'}));
	    print "# YFACTOR $yfactor\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#FIRSTX=\s*(.*)/) {
	    $firstx = $1; chomp($firstx);
	    print STDERR "FIRSTX  $firstx\n" unless (!defined($opts{'d'}));
	    print "# FIRSTX  $firstx\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#LASTX=\s*(.*)/) {
	    $lastx = $1; chomp($lastx);
	    print STDERR "LASTX   $lastx\n" unless (!defined($opts{'d'}));
	    print "# LASTX   $lastx\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#NPOINTS=\s*(.*)/) {
	    $npoints = $1; chomp($npoints);
	    print STDERR "NPOINTS $npoints\n" unless (!defined($opts{'d'}));
	    print "# NPOINTS $npoints\n" unless (defined($opts{'q'}));
	}
	if (/^\#\#XYDATA=\s*\(X\+\+\(Y\.\.Y\)\)/) {
	    $format = $TABULAR;
	    print STDERR "FORMAT  (X++(Y..Y))\n" unless (!defined($opts{'d'}));
	    if (0 == readTabular($firstx, $lastx, $npoints, 
				 $xfactor, $yfactor)) {
		return 0;
	    }	
	}
	if (/^\#\#END=/) {
	   print STDERR "END of block reached (readBlock)\n" 
	       unless (!defined($opts{'d'}));
	   return 0;
       } 
    }
    return 1; # finished
}



