#!/usr/bin/perl -w ### ### wiki-graph.cgi: a program to graph connections in PmWikis ### Author: Bryant Durrell (durrell@innocence.com) ### Version: 1.0 (8/25/2003) ### Copyright: BSD, see end of file ### ### Uses GraphViz to display, for a given wiki node: ### ### * The start node ### * All nodes which link to the start node ### * All nodes which the start node links to (second order nodes) ### * All links from the start node to second order nodes ### * All nodes which the second order nodes link to (third order nodes) ### * All links from second order nodes ### ### If verbose mode is on, it also shows: ### ### * All links from third order nodes to second and third order nodes ### ### use block: ### CGI and LWP are pretty standard; GraphViz and HTML::Tokeparser are not use strict; use GraphViz; use CGI; use LWP::Simple; use HTML::TokeParser; ### ### parameters block: ### base: the base URL for the wiki ### start: the page from which the map is generated ### debug: if on, shows the spidering process rather than the map ### verbose: if on, shows third-order node links my $query = new CGI; my $params = $query->Vars; if (! (exists($params->{'base'}) && exists($params->{'start'}))) { show_error("Parameters 'base' and 'start' must exist."); exit; } my $base = $params->{'base'}; my $start = $params->{'start'}; my $sstart = sanitize($start); my $debug = (exists($params->{'debug'}) ? $params->{'debug'} : 0); my $verbose = (exists($params->{'verbose'}) ? $params->{'verbose'} : 0); ### ### node defaults ### change these to change the look of each node my $shape = "box"; my $font = "luxisr"; my $size = "10"; print "Content-type: text/plain\n\n" if ($debug); ### ### establish the first node ### you may need/want to change the font my $g = GraphViz->new( concentrate => 1, node => { shape => $shape, fontsize => $size, fontname => $font, } ); $g->add_node($sstart, style => 'bold'); ### ### find links to the starting node, then add 'em ### hard-coded for PmWiki -- uses action=search my @inpages = links($base . "Search?action=search&text=" . $start); print "Inlinks: $sstart\n" if ($debug); foreach my $in (@inpages) { $in = sanitize($in); $g->add_node($in); print "\tAdding node $in\n" if ($debug); $g->add_edge($in => $sstart) unless ($in eq $sstart); print "\tAdding edge $in -> $sstart\n" if ($debug); } ### ### find links from the starting node, then add 'em ### set up necessary additional vars if we're being verbose ### @allpages contains all nodes we care about ### %out2 is a hash of references to arrays my @outpages = links($base . $start); my @allpages = (@inpages, @outpages) if ($verbose); my %out2 if ($verbose); foreach my $out (@outpages) { ### ### find second-order nodes, add 'em print "\nOutlinks: $sstart\n" if ($debug); my @out2 = links($base . $out); push @allpages, @out2; $out = sanitize($out); $g->add_node($out); print "\tAdding node $out\n" if ($debug); $g->add_edge($sstart => $out) unless ($sstart eq $out); print "\tAdding edge $sstart -> $out\n" if ($debug); foreach my $out2 (@out2) { ### ### find third-order nodes, add 'em ### and get links from them if we're verbose print "\tOutlinks: $out\n" if ($debug); my @out3 = links($base . $out2) if ($verbose); $out2{$out2} = \@out3 if ($verbose); $out2 = sanitize($out2); if (! grep($out2 eq $_, (@outpages, $sstart))) { ### ### Drop ", style => 'dashed'" if you don't like ### dashes $g->add_node($out2, style => 'dashed'); print "\t\tAdding node $out2\n" if ($debug); } $g->add_edge($out => $out2) unless ($out eq $out2); print "\t\tAdding edge $out -> $out2\n" if ($debug); } } ### ### if we're verbose, get links from third-order nodes back to known nodes if ($verbose) { ### ### strip duplicates from @allpages ### not essential, but saves some cycles my $prev = "nonesuch"; my @all = grep($_ ne $prev && ($prev = $_), sort(@allpages)); ### ### check lists of outgoing links from %out2, add links to known nodes foreach my $out2 (keys %out2) { print "\nOutlinks: $out2\n" if ($debug); foreach my $out3 (@{ $out2{$out2} }) { print "\tChecking: $out3\n"; if (grep($out3 eq $_, @all)) { $g->add_edge($out2 => $out3) unless ($out2 eq $out3); print "\t\tAdding edge $out2 -> $out3\n" if ($debug); } } } } ### ### Dump the PNG if not debugging if (! $debug) { print "Content-type: image/png\n\n"; $g->as_png(\*STDOUT); } ### ### function links: ### takes: page to spider for links ### returns: list of links for that page ### ### screens out Category* and RecentChanges to keep graphs tidy sub links { my @pages; my $target = shift; my $text = get($target); my $p = HTML::TokeParser->new(\$text); while (my $tag = $p->get_tag("a")) { if ($tag->[1]{href} =~ /$base([-\w]+)$/) { my $href = $1; if ($href ne $target && $href !~ /^Category/ && $href ne "RecentChanges") { push @pages, $href; } } } return @pages; } ### ### function sanitize: ### takes: string ### returns: string that GraphViz accepts as a node name ### ### PmWiki allows page names that don't work for GraphViz node names sub sanitize { my $sane = shift; $sane =~ s/-(.)/\u$1/g; return $sane; } ### ### function show_error: ### takes: string ### returns: nothing ### ### Spits out an HTML error page of low quality sub show_error { my $error = shift; print "Content-type: text/html\n\n"; print "Error\n"; print "", $error, "\n"; print "\n"; } ### Copyright (c) 2003, Bryant Durrell ### All rights reserved. ### ### Redistribution and use in source and binary forms, with or ### without modification, are permitted provided that the ### following conditions are met: ### ### Redistributions of source code must retain the above ### copyright notice, this list of conditions and the following ### disclaimer. Redistributions in binary form must reproduce ### the above copyright notice, this list of conditions and the ### following disclaimer in the documentation and/or other ### materials provided with the distribution. Neither the name ### of Innocence Consulting nor the names of its contributors may ### be used to endorse or promote products derived from this ### software without specific prior written permission. THIS ### SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND ### CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, ### INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ### MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ### DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR ### CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ### SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ### NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ### LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ### HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ### CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR ### OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ### SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.