#!/usr/bin/perl -w

###
### wiki-graph.cgi:  a program to graph connections in PmWikis
###	Author: Bryant Durrell (durrell@innocence.com)
###	Version: 1.0 (8/25/2003)
###	Copyright: BSD, see end of file
###
###	Uses GraphViz to display, for a given wiki node:
###
###	* The start node
###	* All nodes which link to the start node
###	* All nodes which the start node links to (second order nodes)
###	* All links from the start node to second order nodes
###	* All nodes which the second order nodes link to (third order nodes)
###	* All links from second order nodes
###
###	If verbose mode is on, it also shows:
###
###	* All links from third order nodes to second and third order nodes

###
### use block:
### 	CGI and LWP are pretty standard; GraphViz and HTML::Tokeparser are not

use strict;
use GraphViz;
use CGI;
use LWP::Simple;
use HTML::TokeParser;

###
### parameters block:
### 	base: the base URL for the wiki
###	start: the page from which the map is generated
###	debug: if on, shows the spidering process rather than the map
###	verbose: if on, shows third-order node links

my $query = new CGI;
my $params = $query->Vars;
if (! (exists($params->{'base'}) && exists($params->{'start'}))) {
	show_error("Parameters 'base' and 'start' must exist.");
	exit;
}

my $base = $params->{'base'};
my $start = $params->{'start'};
my $sstart = sanitize($start);
my $debug = (exists($params->{'debug'}) ? $params->{'debug'} : 0);
my $verbose = (exists($params->{'verbose'}) ? $params->{'verbose'} : 0);

###
### node defaults
### 	change these to change the look of each node

my $shape = "box";
my $font = "luxisr";
my $size = "10";

print "Content-type: text/plain\n\n" if ($debug);

###
### establish the first node
###	you may need/want to change the font

my $g = GraphViz->new(
	concentrate => 1, 
	node => {
		shape => $shape,
		fontsize => $size,
		fontname => $font,
		}
	);
$g->add_node($sstart, style => 'bold');

### 
### find links to the starting node, then add 'em
###	hard-coded for PmWiki -- uses action=search

my @inpages = links($base . "Search?action=search&text=" . $start);
print "Inlinks: $sstart\n" if ($debug);
foreach my $in (@inpages) {
	$in = sanitize($in);
	$g->add_node($in);
	print "\tAdding node $in\n" if ($debug);
	$g->add_edge($in => $sstart) unless ($in eq $sstart);
	print "\tAdding edge $in -> $sstart\n" if ($debug);
}

###
### find links from the starting node, then add 'em
###   	set up necessary additional vars if we're being verbose
###	@allpages contains all nodes we care about
###	%out2 is a hash of references to arrays

my @outpages = links($base . $start);

my @allpages = (@inpages, @outpages) if ($verbose);
my %out2 if ($verbose);

foreach my $out (@outpages) {

	###
	### find second-order nodes, add 'em

	print "\nOutlinks: $sstart\n" if ($debug);
	my @out2 = links($base . $out);
	push @allpages, @out2;
	$out = sanitize($out);
	$g->add_node($out);
	print "\tAdding node $out\n" if ($debug);
	$g->add_edge($sstart => $out) unless ($sstart eq $out);
	print "\tAdding edge $sstart -> $out\n" if ($debug);
	foreach my $out2 (@out2) {

		###
		### find third-order nodes, add 'em
		###	and get links from them if we're verbose

		print "\tOutlinks: $out\n" if ($debug);
		my @out3 = links($base . $out2) if ($verbose);
		$out2{$out2} = \@out3 if ($verbose);
		$out2 = sanitize($out2);
		if (! grep($out2 eq $_, (@outpages, $sstart))) {

			###
			### Drop ", style => 'dashed'" if you don't like
			###	dashes

			$g->add_node($out2, style => 'dashed');
			print "\t\tAdding node $out2\n" if ($debug);
		}
		$g->add_edge($out => $out2) unless ($out eq $out2);
		print "\t\tAdding edge $out -> $out2\n" if ($debug);
	}
}

###
### if we're verbose, get links from third-order nodes back to known nodes

if ($verbose) {

	###
	### strip duplicates from @allpages
	###	not essential, but saves some cycles

	my $prev = "nonesuch";
	my @all = grep($_ ne $prev && ($prev = $_), sort(@allpages)); 

	###
	### check lists of outgoing links from %out2, add links to known nodes

	foreach my $out2 (keys %out2) {
		print "\nOutlinks: $out2\n" if ($debug);
		foreach my $out3 (@{ $out2{$out2} }) {
			print "\tChecking: $out3\n";
			if (grep($out3 eq $_, @all)) {
				$g->add_edge($out2 => $out3) 
					unless ($out2 eq $out3);
				print "\t\tAdding edge $out2 -> $out3\n" 
					if ($debug);
			}
		}
	}
}

###
### Dump the PNG if not debugging

if (! $debug) {
	print "Content-type: image/png\n\n";
	$g->as_png(\*STDOUT);
}

###
### function links:
###	takes: page to spider for links
###	returns: list of links for that page
###
###	screens out Category* and RecentChanges to keep graphs tidy

sub links {
	my @pages;
	my $target = shift;
	my $text = get($target);
	my $p = HTML::TokeParser->new(\$text);

	while (my $tag = $p->get_tag("a")) {
		if ($tag->[1]{href} =~ /$base([-\w]+)$/) {
			my $href = $1;
			if ($href ne $target && 
			    $href !~ /^Category/ && 
			    $href ne "RecentChanges") 
			{
				push @pages, $href;
			}
		}
	}	
	return @pages;
}

###
### function sanitize:
###	takes: string
### 	returns: string that GraphViz accepts as a node name
###
###	PmWiki allows page names that don't work for GraphViz node names

sub sanitize {
	my $sane = shift;
	$sane =~ s/-(.)/\u$1/g;
	return $sane;
}

###
### function show_error:
###	takes: string
###	returns: nothing
###
###	Spits out an HTML error page of low quality

sub show_error {
	my $error = shift;
	print "Content-type: text/html\n\n";
	print "<html><head><title>Error</title></head>\n";
	print "<body>", $error, "</body>\n";
	print "</html>\n";
}

### Copyright (c) 2003, Bryant Durrell
### All rights reserved.
### 
### Redistribution and use in source and binary forms, with or
### without modification, are permitted provided that the
### following conditions are met:
### 
### Redistributions of source code must retain the above
### copyright notice, this list of conditions and the following
### disclaimer.  Redistributions in binary form must reproduce
### the above copyright notice, this list of conditions and the
### following disclaimer in the documentation and/or other
### materials provided with the distribution.  Neither the name
### of Innocence Consulting nor the names of its contributors may
### be used to endorse or promote products derived from this
### software without specific prior written permission.  THIS
### SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
### CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
### INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
### MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
### DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
### CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
### SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
### NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
### LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
### HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
### CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
### OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
### SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
