#!/usr/bin/perl
#
# ltzip - Lossy Text Compression. Perl, Unix.
#
# This program compresses text files using a unique lossy text
#  compression algorithm. Decompression is not possible.
#  The result file has a ".ltz" extension, and the original file
#  remains.
# 
# Not only is the byte count reduced, but the bytes themselves are
#  smaller bytes - and weigh less when stored on disk.
#
# 24-Apr-2005	ver 0.70
#
# USAGE: ltzip [-v] textfile
#    eg,
#        ltzip -v unimportant.txt
#
#
# SEE ALSO: cat filename > /dev/null
#
# THANKS: Peter Van Schaik
#
# COPYRIGHT: Copyright (c) 2005 Brendan Gregg.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software Foundation,
#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#  (http://www.gnu.org/copyleft/gpl.html)
#
# 24-Apr-2005	Brendan Gregg	Created this.


#
#  Process Command Line Arguments
#
use Getopt::Std;
getopts('v') || &usage();
$verbose = $opt_v;
$pathname = $ARGV[0];
&usage() if $pathname eq "";


#
#  Read Text File
#
open(TEXT,$pathname) || die("ERROR1: Can't read $pathname: $!\n");
@Raw = <TEXT>;
close(TEXT);

#
#  Create Compressed File
#
$outfile = "$pathname.ltz";
if (-e "$outfile") {
	print STDERR "ERROR2: Output file $outfile already exists. Exiting.\n";
	exit(2);
}
open(OUT,">$outfile") || die("ERROR3: Can't creat $outfile: $!\n");
foreach $line (@Raw) {
	### Compress,
	$line = ltcompress($line);
	print OUT "$line\n";
}
close OUT;

if ($verbose) {
	$sizeold = (-s "$pathname");
	$sizenew = (-s "$outfile");
	if ($sizeold == 0) {
		$ratio = 1;
	} else {
		$ratio = $sizenew / $sizeold;
	}
	printf("Read   : %-40s %8d bytes\n",$pathname,$sizeold);
	printf("Created: %-40s %8d bytes\n",$outfile,$sizenew);
	printf("Reduced: %.2f%%\n",(100 - ($ratio * 100)));
}

#
#  Subroutines
#
sub ltcompress {
	my $line = shift;
 
	$line =~ s/\s//g;	# drop whitespace
	$line =~ tr/A-Z/a-z/;	# reduce size of characters
	$line =~ s/-/_/g;	# make dashes smaller
	$line =~ s/:/./g;	# make colons smaller
	$line =~ s/;/,/g;	# make semi-colons smaller
	$line =~ s/\"/\'/g;	# make double quotes smaller
	$line =~ s/0//g;	# drop zeros, they are unimportant
 
	return($line);
}

sub usage {
	print STDERR "USAGE: ltzip [-v] textfile\n";
	print STDERR "   eg,\n";
	print STDERR "       ltzip unimportant.txt\n";
	exit(1);
}
