#!/usr/bin/tclsh

#    Copyright (C) 2000 artofcode LLC.  All rights reserved.
# 
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA, 02111-1307.

# $Id: hrefcov.tcl,v 1.5.4.1.2.1 2003/04/12 14:02:39 giles Exp $

# hrefcov.tcl - check that the hrefs in an HTML document mention all of a
# set of files.  The requirement is that the union of all the docfiles
# must somewhere reference all the files.  Usage:
set USAGE {Usage:
    hrefcov (+src | +lib | [+-]from <docfile> | [+-]to (<directory> | <file>))*
}
# +from or +to adds files; -from or -to removes them;
# +src and +lib execute SRC_LIST and LIB_LIST below.

# Define the Ghostscript-specific parameter lists.
set SRC_LIST [list\
	+from doc/Develop.htm\
	+to lib src\
	-to lib/CVS src/CVS\
	-to src/*.mak.tcl\
	-to lib/*.upp\
	-to lib/*.ps +to lib/gs_*.ps lib/pdf_*.ps
]
set LIB_LIST [list\
	+from doc/Psfiles.htm\
	+to examples/*.ps lib/*.ps
]

# Global variables:
#   TO(file) is defined for every file in the "to" list
#   TO_DIR(dir) is defined for every directory in the "to" list
#   FROM(file) is defined for every file mentioned in a "from" document
# In both cases, path names are normalized by removing ./ and/or ../
# whenever possible, to produce file names relative to the directory where
# this program is being run.

# Initialize the internal data base.
proc init {} {
    global FROM TO TO_DIR

    catch {unset FROM}
    catch {unset TO}
    catch {unset TO_DIR}
}

# Normalize a file name by removing all occurrences of ./ and
# all occurrences of <dir>/../.
proc normalize_fname {fname} {
    set name $fname
				# Remove a trailing /
    regsub {/$} $name "" name
				# Remove occurrences of ./
    while {[regsub {^\./} $name "" name]} {}
    while {[regsub {/\./} $name / name]} {}
    while {[regsub {/\.$} $name "" name]} {}
    if {$name == ""} {return /}
				# Remove occurrences of <dir>/../
    while {[regsub {(^|/)([^./]|.[^./])[^/]*/../} $name {\1} name]} {}
    if {$name == ""} {return .}
    return $name
}

# Add or remove a file, or all the files in a directory, to/from TO.
proc add_to {to} {
    global TO TO_DIR

    if {[file isfile $to]} {
	set TO($to) 1
    } elseif {[file isdirectory $to]} {
	set TO_DIR($to) 1
	foreach f [glob $to/*] {add_to $f}
    }
}
proc remove_to {to} {
    global TO TO_DIR

    if {[file isfile $to]} {
	catch {unset TO($to)}
    } elseif {[file isdirectory $to]} {
	catch {unset TO_DIR($to)}
	foreach f [glob $to/*] {remove_to $f}
    }
}

# Add or remove all the files mentioned in a document to/from FROM.
# Note that we only add/remove files mentioned as a whole, i.e., without #.
proc for_from {doc proc} {
    set lines ""
    set prefix ""
    regexp {^(.*/)[^/]+$} $doc skip prefix
    catch {set lines [split [exec egrep -i {href="[^#]} $doc] "\n"]}
    set href_exp {href="([^"#]*)"(.*)$}
    foreach line $lines {
	while {[regexp -nocase $href_exp $line skip ref line]} {
	    $proc [normalize_fname $prefix$ref]
	}
    }
}
proc add_from {doc} {for_from $doc add1_from}
proc add1_from {from} {global FROM; set FROM($from) 1}
proc remove_from {doc} {for_from $doc remove1_from}
proc remove1_from {from} {global FROM; catch {unset FROM($from)}}

# Main program.
proc main_args {arglist} {
    global FROM TO SRC_LIST LIB_LIST

    foreach arg $arglist {
	switch -glob -- $arg {
	    +src {main_args $SRC_LIST}
	    +lib {main_args $LIB_LIST}
	    +from {set do add_from}
	    -from {set do remove_from}
	    +to {set do add_to}
	    -to {set do remove_to}
	    {[+-]*} {
		puts stderr "Unknown switch: $arg"
		exit 1
	    }
	    default {
		if {[regexp {[*]} $arg]} {
		    foreach a [glob -nocomplain $arg] {$do $a}
		} else {
		    $do $arg
		}
	    }
	}
    }
}
proc main {argv} {
    global FROM TO TO_DIR

    init
    main_args $argv
    set dirs_exp {^$}
    foreach dir [array names TO_DIR] {
	append dirs_exp "|$dir/"
    }
    set list {}
    foreach f [array names TO] {
	if {![info exists FROM($f)]} {lappend list $f}
    }
    if {$list != {}} {
	puts "        ****** Files defined but not referenced ******"
	foreach f [lsort $list] {puts $f}
    }
    set list {}
    foreach f [array names FROM] {
	if {![info exists TO($f)]} {
	    # Only report files that should be in a scanned directory.
	    if {[regexp "(${dirs_exp})\[^/\]+$" $f]} {
		lappend list $f
	    }
	}
    }
    if {$list != {}} {
	puts "        ****** Files referenced but not defined ******"
	foreach f [lsort $list] {puts $f}
    }
}

main $argv
