use warnings; use strict; use Data::Dumper; ######################################## my $default_lines_per_page = 46; my $default_characters_per_line = 72; # my $default_tab_stop = 3; my $default_output_text = 0; ######################################## if( (scalar(@ARGV)==0) or ($ARGV[0]=~m{\A-h}) ) { print <<"HELP"; Call this script, passing in the name of a text file containing your story in plaintext format. This script will then read it, and generate a publisher-friendly word count (note that this is a different sort of word count than is reported by normal word processors.) Example: perl wordcount.pl mystory.txt By default, the program will assume $default_lines_per_page lines per page and $default_characters_per_line characters per line and $default_tab_stop characters per tab. If you want to change any of these values, add them on the command line after the filename. For example: perl wordcount.pl mystory.txt -lines=23 -chars=60 -tabs=5 If you want to change the defaults, edit the first few lines of your copy of the program. Copyright 2006 Greg London This program licensed under the CreativeCommons-Attribution license. http://creativecommons.org/licenses/by/2.5/ HELP ; exit; } ########################################################### # process arguments and check for errors ########################################################### my $storyname = shift(@ARGV); my %actual_values = ( lines => $default_lines_per_page, chars => $default_characters_per_line, tabs => $default_tab_stop, text => $default_output_text, ); while(scalar(@ARGV)) { my $arg = shift(@ARGV); if ($arg=~m{-(\w+)=(\d+)}) { my ($key, $val) = ($1,$2); unless(exists($actual_values{$key})) { die "Error: unknown argument '$arg'"; } $actual_values{$key}=$val; } } #print Dumper \%actual_values; unless(defined($storyname)) { die "Error: please provide text filename"; } unless (-e $storyname) { die "Error: could not find file '$storyname'"; } ########################################################### # only print out if -text=1 ########################################################### sub pprint { ########################################################### if($actual_values{text}) { print @_; } } ########################################################### #process file ########################################################### open(my $in, $storyname) or die "Error: unable to open $storyname"; my $linecounter = 0; my $columncounter = 0; my $chunk=''; while(<$in>) { $linecounter++; my $linetext = $_; while(length($linetext)) { if(0) { # tabs } elsif ($linetext =~ s{\A(\t)}{}) { $chunk = ' ' x $actual_values{tabs}; $columncounter += $actual_values{tabs}; #warn "aaa"; } elsif ($linetext =~ s{\A(\n)}{}) { $chunk = $1; $columncounter = $actual_values{chars} + 10; #warn "aaa"; # whitespace can go on end of line past line. } elsif ($linetext =~ s{\A(\s)}{}) { $chunk = $1; $columncounter += 1; #warn "aaa"; # don't split words with trailing punctuation. } elsif ($linetext =~ s{\A(\w+\S+)}{}) { $chunk = $1; $columncounter += length($chunk); #warn "aaa"; # don't split words, but if followed by whitespace, can ignore space this round. } elsif ($linetext =~ s{\A(\w+)}{}) { $chunk = $1; $columncounter += length($chunk); #warn "aaa"; # punctuation marks must be surrouned by whitespace } elsif ($linetext =~ s{\A([^\s\w]+)}{}) { $chunk = $1; $columncounter += length($chunk); #warn "aaa"; } else { die "Parse Error: no match on remaining text, '$linetext'"; } #print "chunk is '$chunk'\n"; # now figure out if we're about to go past end of column if($columncounter> $actual_values{chars}) { # if white space at end of line, don't print it. if($chunk =~ m{\A\s}) { $columncounter = 0; pprint("\n"); # if non-white space at end of line, print it on next line } else { pprint("\n"); pprint($chunk); $columncounter = length($chunk); } $linecounter++; } else { # haven't reached end of line, print out plain text pprint($chunk) } } # while (linetext) } # while (in) close ($in) or warn "Warning: problem closing '$storyname', I hope it's OK."; my $total_pages_float = $linecounter / $actual_values{lines}; my $total_pages_int = int($total_pages_float * 10); my $total_pages = $total_pages_int / 10; print "\n\n\n"; print "#"x40; print "\ntotal lines for this text is $linecounter\n"; print "With a lines per page of ". ($actual_values{lines})."\n"; print "that yields a total of $total_pages pages\n"; print "#"x40; print "\n\n\n";