#!/usr/local/bin/perl 'di'; 'ig00'; ;# ;# cdif: word context diff ;# ;# Copyright (c) 1992-1993 Kazumasa Utashiro ;# Software Research Associates, Inc. ;# 1-1-1 Hirakawa-cho, Chiyoda-ku, Tokyo 102, Japan ;# ;; $rcsid = q$Id: cdif,v 1.10 1993/06/15 09:32:08 utashiro Exp $; ;# Original: 1992/03/11 ;# ;; ($myname = $0) =~ s,.*/,,; ;; $usage = <<_; Usage: $myname [-B] [-v] [-n] [-A #] [-C #] [-D #] [-I #] [-[bwcu]] file1 file2 $myname [-rcs] [-q] [-rrev1 [-rrev2]] [$myname options] file $myname [$myname options] [diff-output-file] Options: -B byte compare -v use video standout (default for tty) -n use nroff style overstrike (default for non-tty) -b ignore trailing blank -w ignore whitespace -c[#] context diff -u[#] unified diff (if diff has -u option) -A, -C, -D (Append, Change, Delete) takes one of vso: video standout vul: video underline vbd: video bold bd: nroff style overstrike ul: nroff style underline or any sequence or sequences separated by comma -I specify string to be shown on insertion point Following strings have special meanings. vbar: print vertical bar at the point caret: print caret under the point -diff=command specify any diff command _ if ($ENV{'CDIFOPTS'}) { require('shellwords.pl'); unshift(@ARGV, &shellwords($ENV{'CDIFOPTS'})); }; $opts = 'A:C:D:I:Bcubdvqnt:'.'ibw'; while ($_ = $ARGV[0], /^-.+/ && shift) { next unless ($car, $cdr) = /^-?(.)(.*)/; if (/-rcs$/) { $rcs++; next; } if (/-diff=(.+)$/) { $diff = $1; next; } if ($car eq 'h') { &usage; } if ($car eq 'H') { exec "nroff -man $0|".($ENV{'PAGER'}||'more')." -s"; } if ($car eq 'r') { &usage("$_: Too many revisions\n\n") if @rcs == 2; $rcs++; push(@rcs, $_); next; } if (s/^-?([cu]\d+)//) { push(@diffopts, "-$1"); eval "\$opt_$car++"; redo; } push(@diffopts, "-$car") if index("iwcbu", $car) >= $[; push(@sub_diffopts, "-$car") if index("iw", $car) >= $[; if (/-(i[bxu])$/) { eval "\$opt_$1 = 1;"; next; } if (index($opts, "$car:") >= $[) { eval "\$opt_$car = length(\$cdr) ? \$cdr : \@ARGV ? shift : &usage"; next; } if (index($opts, $car) >= $[) { eval "\$opt_$car++"; $_ = $cdr; redo; } &usage("Unknown option: $car\n"); } $diff = $rcs ? 'rcsdiff' : 'diff' unless $diff; sub usage { select(STDERR); print @_, $usage; print "$rcsid\n" if $rcsid =~ /:/; exit; } if ($rcs) { push(@rcsopt, '-q') if $opt_q; $rcsfile = shift || &usage("No RCS filename\n\n"); $DIFF = "$diff @diffopts @rcsopt @rcs $dopts $rcsfile|"; } elsif (@ARGV == 2) { ($OLD, $NEW) = splice(@ARGV, 0, 2); $DIFF = "$diff @diffopts $OLD $NEW |"; } elsif (@ARGV < 2) { $DIFF = shift || '-'; } else { &usage("Arguments error.\n\n") if @ARGV; } print "DIFF=\"$DIFF\"\n" if $opt_d; %func = ('bd', *bd, 'ul', *ul, 'vso', *vso, 'vul', *vul, 'vbd', *vbd); ($A, $C, $D) = (!$opt_n && ($opt_v || -t STDOUT)) ? ('vso', 'vbd', 'vso') : ('bd', 'ul', 'bd'); for ('A', 'C', 'D') { eval "\$opt = \$opt_$_ || \$$_"; if ($func{$opt}) { eval "*$_ = \$func{$opt}"; &termcap if ($opt =~ /^v/) && !%TC; } else { $S = $S ? ++$S : 'S001'; ($start, $end) = $opt =~ /,/ ? split(/,/, $opt) : ($opt, $opt); $start =~ s/\W/\\$&/g; $end =~ s/\W/\\$&/g; eval "sub $S {join('', \"$start\", \@_, \"$end\");} *$_ = *$S;\n"; } } if ($opt_B) { $pat = '[\200-\377]?[\000-\377]'; } else { $pat = '[\200-\377].|\w+|[ \t\r\f]*\n|\s+|[\000-\377]'; } @ul[1,2] = ("_\010", "__\010\010"); @bs[1,2] = ("\010", "\010\010"); if (defined($opt_I)) { if ($opt_I =~ /^vbar$/i) { # special string 'vbar' $opt_ix = 1; $opt_ib = $opt_I =~ /[A-Z]/; $opt_I = '|'; } if ($opt_I =~ /^caret$/i) { # special string 'caret' $opt_ix = $opt_iu = 1; $opt_ib = $opt_I =~ /[A-Z]/; $opt_I = '^'; } $opt_I = &bd($opt_I) if $opt_ib; ($hcr, $hcf, $hlr, $hlf) = ("\0336", "\0337", "\0338", "\0339"); $opt_I = $hcr . $opt_I . $hcr if $opt_ix; $opt_I = $hlf . $opt_I . $hlr if $opt_iu; } $tmp = $opt_t || '/tmp'; $T1 = "$tmp/cdif_" . ($opt_d >= 2 ? $< : $$) . '_1'; $T2 = "$tmp/cdif_" . ($opt_d >= 2 ? $< : $$) . '_2'; @SIG{'INT', 'HUP', 'QUIT', 'TERM'} = ('cleanup') x 4; $* = 1; open DIFF || die "$diff: $!\n"; while () { print; if (($left, $right) = /^([\d,]+)c([\d,]+)$/) { $old = &read(DIFF, scalar(&range($left))); $del = &read(DIFF, 1); $new = &read(DIFF, scalar(&range($right))); $old =~ s/^< //g; $new =~ s/^> //g; ($old, $new) = &context($old, $new); $old =~ s/^/< /g; $new =~ s/^/> /g; print $old, $del, $new; } elsif (($left) = /^\*\*\* ([\d,]+) \*\*\*\*$/) { @old = &read_diffc(DIFF, scalar(&range($left))); print(@old), next if @old < 2; $new = $_ = ; @new = (); if (($right) = /^--- ([\d,]+) ----$/) { @new = &read_diffc(DIFF, scalar(&range($right))); for ($i = $[ + 1; $i <= $#old; $i += 2) { $old[$i] =~ s/^! //g; $new[$i] =~ s/^! //g; ($old[$i], $new[$i]) = &context($old[$i], $new[$i]); $old[$i] =~ s/^/! /g; $new[$i] =~ s/^/! /g; } } print @old, $new, @new; } elsif (/^\@\@ -\d+(,(\d+))? \+\d+(,(\d+))? \@\@$/) { @buf = &read_diffu(DIFF, $2 || 1, $4 || 1); while (($same, $old, $new) = splice(@buf, 0, 3)) { if ($old && $new) { $old =~ s/^\-//g; $new =~ s/^\+//g; ($old, $new) = &context($old, $new); $old =~ s/^/-/g; $new =~ s/^/+/g; } print $same, $old, $new; } } } close DIFF; exit $? >> 8; ###################################################################### sub context { local($old, $new, $_) = @_; local(@control); @c{'a', 'd', 'c'} = (0, 0, 0); @owlist = &maketmp($T1, $old); @nwlist = &maketmp($T2, $new); open(CDIF, "diff @sub_diffopts $T1 $T2 |") || die "diff: $!\n"; /[\d,]+([adc])[\d,]+/ && push(@control, $_) && $c{$1}++ while ; close CDIF; &cleanup() if $opt_d < 2; if ($opt_d >= 1) { printf "old=%d new=%d control=%d\n", @owlist+0, @nwlist+0, @control+0; printf "append=$c{'a'} delete=$c{'d'} change=$c{'c'}\n"; } &makebuf; die "$myname: illegal status of subprocess\n" if ($?>>8) > 1; return ($obuf, $nbuf); } sub cleanup { unlink($T1), unlink($T2) if $opt_d < 2; warn("\nSignal @_\n"), exit 2 if @_; } sub maketmp { local($tmp, $_, @words) = @_; local(@notspace) = (0); open(TMP, ">$tmp") || warn "$tmp: $!\n", return 0; foreach (/$pat/go) { if ($opt_w) { push(@notspace, !/\s/); if (shift(@notspace) && $notspace[0]) { push(@words, ''); print TMP "\n"; } } if (s/^(\s*)\n/\n/ && (length($1) || $opt_b || $opt_w)) { # ^ This have to be *. Don't change to +. print TMP ($opt_b || $opt_w) ? "\n" : "$1\n"; push(@words, $1); } push(@words, $_); print TMP $_; print TMP "\n" unless /\n$/; } close TMP; if ($opt_d && @words != &wc_l($tmp)) { die "Error! (\@words != `wc -l $tmp`)\n"; } @words; } sub makebuf { local($_, $old, $ctrl, $new, $o1, $o2, $n1, $n2); local($o, $n); $obuf = $nbuf = ''; for (@control) { ($old, $ctrl, $new) = /([\d,]+)([adc])([\d,]+)/; ($o1, $o2) = &range($old); $o1--; $o2--; ($n1, $n2) = &range($new); $n1--; $n2--; $obuf .= join('', @owlist[$o .. $o1 - 1]), $o = $o1 if $o < $o1; $nbuf .= join('', @nwlist[$n .. $n1 - 1]), $n = $n1 if $n < $n1; if ($ctrl eq 'd') { $obuf .= &D(@owlist[$o1 .. $o2]); $o = $o2 + 1; $nbuf .= $nwlist[$n++] . $opt_I if $opt_I; } elsif ($ctrl eq 'c') { $obuf .= &C(@owlist[$o1 .. $o2]); $nbuf .= &C(@nwlist[$n1 .. $n2]); $o = $o2 + 1; $n = $n2 + 1; } elsif ($ctrl eq 'a') { $nbuf .= &A(@nwlist[$n1 .. $n2]); $n = $n2 + 1; $obuf .= $owlist[$o++] . $opt_I if $opt_I; } } $obuf .= join('', @owlist[$o .. $#owlist]); $nbuf .= join('', @nwlist[$n .. $#nwlist]); } sub read_diffc { local($FH, $n, $i, @buf, $_) = @_; for ($i = 0; $n-- && ($_ = <$FH>); $buf[$i] .= $_) { $i++ if ($i % 2) != /^!/; } @buf; } sub read_diffu { local($FH, @l[1,2,0]) = @_; local($i, @buf, $slot); %slot = (' ', 0, '-', 1, '+', 2) unless %slot; for (; 2 * $l[0] + $l[1] + $l[2] > 0 && ($_ = <$FH>); $buf[$i] .= $_) { $i++ while ($i % 3) != ($slot = $slot{substr($_, 0, 1)}); $l[$slot]--; } @buf; } sub vso { join('', $so, @_, $se); } sub vul { join('', $us, @_, $ue); } sub vbd { join('', $md, @_, $me); } sub ul { join('', grep(s/[\200-\377]?./$ul[length($&)].$&/ge || 1, @_)); } sub bd { join('', grep(s/[\200-\377]?./$&.$bs[length($&)].$&/ge || 1, @_)); } sub range { local($_, $from, $to) = @_; ($from, $to) = /,/ ? split(/,/) : ($_, $_); wantarray ? ($from, $to) : $to - $from + 1; } sub read { local($FH, $c, @buf) = (@_); push(@buf, scalar(<$FH>)) while $c--; wantarray ? @buf : join('', @buf); } sub termcap { do 'ioctl.ph' || do 'sys/ioctl.ph'; require('termcap.pl'); $ospeed = 1 unless $ospeed; &Tgetent; $so = &Tputs($TC{'so'}); $se = &Tputs($TC{'se'}); $us = &Tputs($TC{'us'}); $ue = &Tputs($TC{'ue'}); $md = &Tputs($TC{'md'}) || $so; $me = &Tputs($TC{'me'}) || $se; } sub wc_l { local($file) = shift; local(*FILE, $line); open(FILE, $file) || die "$file: $!\n"; $line++ while ; close FILE; $line; } ###################################################################### .00; # finish .ig 'di \" finish diversion--previous line must be blank .nr nl 0-1 \" fake up transition to first page again .nr % 0 \" start at page 1 '; __END__ ############# From here on it's a standard manual page #### .de XX .ds XX \\$4\ (v\\$3) .. .XX $Id: cdif,v 1.10 1993/06/15 09:32:08 utashiro Exp $ .TH CDIF 1 \*(XX .SH NAME cdif \- word context diff .SH SYNOPSIS .nr ww \w'\fBcdif\fP\ ' .in +\n(wwu .ta \n(wwu .ti -\n(wwu \fBcdif\fP \c [-B] [-v] [-n] [-A #] [-C #] [-D #] [-[bwcu]] \fIfile1\fP \fIfile2\fP .br .ti -\n(wwu \fBcdif\fP \c [-rcs] [-q] [-r\fIrev1\fP [-r\fIrev2\fP]] [\fIcdif options\fP] \fIfile\fP .br .ti -\n(wwu \fBcdif\fP \c [\fIcdif options\fP] [\fIdiff-output-file\fP] .SH DESCRIPTION .I Cdif is a post-processor of the Unix \fIdiff\fP command. It highlights deleted, changed and added words based on word context. Highlighting is usually done by using different style font and the output looks like this: .ne 6 .nf 1c1 < highlights \fBdeleted\fP and \fIchanged\fP and words --- > highlights and \fImodified\fP and \fBadded\fP words .fi .PP Appended and deleted lines are not effected at all. .PP You may want to compare character-by-character rather than word-by-word. Option \-B option can be used for that purpose and the output looks like: .ne 6 .nf 1c1 < $opts = '\fBcu\fPbdvqnB\fIA\fP:\fIC\fP:\fID\fP:'; --- > $opts = 'bdvqn\fBxy\fPB\fIP\fP:\fIQ\fP:\fIR\fP:'; .fi .PP If the standard output is a terminal, \fIcdif\fP uses termcap to highlights words. It uses \fImd\fP and \fIme\fP (bold or bright) sequence for changed words and \fIso\fP and \fIse\fP (standout) sequence for deleted and added part. .PP If the standard output is not a terminal, it uses nroff style overstriking to highlight them. You won't be able to tell the difference on normal screen but using pager command like \fImore\fP or \fIless\fP, or printing to appropriate printer will make them visible. .PP If only one file is specified, \fIcdif\fP reads that file (stdin if no file) as a output from diff command. This is convenient because you don't have to retype the command line. \fICdif\fP is often used in following context. .ne 5 .nf % rcsdiff foo.c (I can't see any difference...) % !! | cdif .fi Lines those don't look like diff output are simply ignored and printed same as input. So you can use \fIcdif\fP from \fIrn\fP(1) like `|cdif'. .SH OPTIONS .IP \-B Compare the data character-by-character context. .IP \-v Force to use video effect even if the output is not a terminal. .IP \-n Force not to use video effect even if the output is a terminal. .IP "\-[ACD] \fIeffect\fP" Specify the effect to use for added, changed and deleted words respectively. Special effects are: .ne 7 .nf vso: video standout vul: video underline vbd: video bold bd: nroff style overstrike ul: nroff style underline .fi .IP If specified effect doesn't match to any of special words, the sequence is used to highlighting string. Start and end string are separated by comma. So command: .ne 3 .nf cdif -D'<,>' -C'[,]' -A'{,}' .fi produces following output. .ne 6 .nf 1c1 < highlights and [changed] and words --- > highlights and [modified] and{ added} words .fi .IP "\-I \fIstring\fP" Specify the string to be shown at the inserted or deleted point. Normally the point at where new text is inserted or old text is deleted is not indicated in the output. \-I option specifies the string which is to be shown at the insertion point. For example, command .ne 3 .nf cdif -D'<,>' -C'[,]' -A'{,}' -I'|' .fi produces the output like below. .ne 6 .nf 1c1 < highlights and [changed] and| words --- > highlights |and [modified] and{ added} words .fi Following four options are used to specify how this string is printed in the output. .RS .IP \-ib Makes the string overstruck. .IP \-iu Put the string under the line. According to the sequence of Teletype model 37, the string is enclosed by ESC-9 and ESC-8 sequences. .IP \-ix Put the string at the exact point. The string is enclosed by non-standard ESC-6 sequences which indicate to move the printing point half-character backward. Currently this sequence is understood only by my perl version of a2ps. See RELATED COMMANDS. .RE .IP "" Special strings \fIvbar\fP, \fIVBAR\fP, \fIcaret\fP, \fICARET\fP are prepared for shortcut. `\fIvbar\fP' means putting the vertical bar (|) at the exact insertion point. `\fIcaret\fP' means putting the caret mark (^) under the exact insertion point. Uppercase character makes them overstruck. .IP "\-diff=\fIcommand\fP" Specify the diff command to use. .IP "\-rcs, \-r\fIrev\fP" Use rcsdiff instead of normal diff. Option \-rcs is not required when \-r\fIrev\fP is supplied. .IP "\-b, \-w, \-c, \-u" Passed through to the back-end diff command. \fICdif\fP can process the output from context diff (\-c) and unified diff (\-u) if those are available. .SH RELATED COMMANDS .IP a2ps is a command which converts ascii text to postscript program. C version was originally posted to usenet by miguel@imag.imag.fr (Miguel Santana). I reimplemented it by perl and enhanced to use different font family for overstruck and underlined characters. This is a convenient tool to print the output from cdif to postscript printer. .IP sdif is a System V's sdiff clone written in perl. It also can handle overstruck and underlined character, and has \-cdif option to use \fIcdif\fP instead of normal diff. .IP termfix is a program to change termcap capability of current terminal temporarily. Recent more and less uses \fImd\fP and \fIme\fP capability to display overstruck character if available. Sometimes we want to use standout rather than bold. Termfix allows to cancel \fImd\fP and \fImd\fP capability only for the invoking command by ``md@'' option. .PP All of these programs can be ftp'ed from sra.co.jp (133.137.4.3) by anonymous ftp from the directory ~ftp/pub/lang/perl/sra-scripts. .SH AUTHOR .nf Kazumasa Utashiro Software Research Associates, Inc. 1-1-1 Hirakawa-cho, Chiyoda-ku, Tokyo 102, Japan .fi .SH "SEE ALSO" perl(1), diff(1), sdif(1), a2ps(1), termfix(1) .SH BUGS .PP When the last one word (the last byte with \-B option) of a line moved to the other line, it is treated as disappeared and the new word was appeared in the other line. This is due to the behavior of diff command. .PP \fICdif\fP is naturally slow because it uses normal diff command as a back-end processor to compare words. .ex