#!/usr/bin/perl

# rivers.cgi - using Protovis, display co-occurances in the form of a network diagram

# Eric Lease Morgan <eric_morgan@infomotions.com>
# December 24, 2010 - first investigations as a CGI script
# December 26, 2010 - added ability to configure radius and threshold
# December 28, 2010 - tweaked interface
# January   2, 2011 - added sliders
# January   9, 2011 - tweaked visualization; added matrix

# configure
use constant EMERSON     => '../corpus/emerson-representative-755.txt';
use constant MACHIAVELLI => '../corpus/machiavelli-prince-680.txt';
use constant QUERY       => 'men';
use constant RADIUS      => 50;
use constant THRESHOLD   => 7;

# require
use CGI;
use Lingua::Concordance;
use Lingua::StopWords qw( getStopWords );
use strict;
require '../lib/breath.pl';

# initialize
my $cgi       = CGI->new;
my $query     = $cgi->param( 'q' ) ? $cgi->param( 'q' ) : QUERY;
my $radius    = $cgi->param( 'r' ) ? $cgi->param( 'r' ) : RADIUS;
my $threshold = $cgi->param( 't' ) ? $cgi->param( 't' ) : THRESHOLD;
my $stopwords = &getStopWords( 'en' ); $$stopwords{ 'one' }++;
my %matrix    = ();
my $html      = '';

# denote the work to evaluate
my $work  = $cgi->param( 'w' );
my $file = '';
if ( $work eq 'prince' ) { $file = MACHIAVELLI }
elsif ( $work eq 'representative' ) { $file = EMERSON }
my $corpus = &slurp( $file );

# get initial words found near the query and sort them by frequency
my $words = &concordance( $corpus, $query, $radius, $stopwords );
my @keys = sort { $$words{ $b } <=> $$words{ $a } } keys %$words;

# process each word (key) below a particular threshold
for ( my $i = 0; $i < $threshold; $i++ ) {

	my $query = $keys[ $i ];
	my $words = &concordance( $corpus, $query, $radius, $stopwords );
	my @subkeys = ( sort { $$words{ $b } <=> $$words{ $a } } keys %$words );
	my $coocurrances = &coocurances( $subkeys[ 0 ], $words, $threshold );
	
	my @list = ();
	my $j    = 0;
	my $key  = '';
	foreach ( sort { $$coocurrances{ $b } <=> $$coocurrances{ $a } } keys %$coocurrances ) {

		$j++;
		if ( $j == 1 ) { $key = $_ }
		push @list, $_;
	
	}
	
	$matrix{ $key } = [ @list ];

}

my $matrix = '<table cellpadding="7">';
foreach ( sort keys %matrix ) {

	$matrix .= '<tr>';
	my $list = $matrix{ $_ };
	foreach my $word ( @$list ) { $matrix .= "<td>$word</td>" }
	$matrix .= '</tr>';
	
}
$matrix .= '</table>';

# create an ordered list of the found words
my %words = ();
my $i     = 0;
foreach ( keys %matrix ) {

	my $list = $matrix{ $_ };
	foreach my $word ( @$list ) {
	
		my $found = 0;
		foreach my $key ( keys %words ) {
		
			if ( $key eq $word ) { $found = 1 }
			
		}
		
		if ( ! $found ) {
		
			$words{ $word } = $i;
			$i++;
			
		}
		
	}
	
}

# build a list of nodes for Protovis
my $nodes = '';
foreach ( sort { $words{ $a } <=> $words{ $b } } keys %words ) { $nodes .= qq({nodeName:"$_"},) }
chop $nodes;

# build a list of links for Protovis
my $links = '';
foreach my $source ( keys %matrix ) {

	my $list = $matrix{ $source };
	foreach ( my $i = 1; $i < $threshold; $i++ ) {
	
		$links .= qq({source:$words{ $$list[ $source ] },target:$words{ $$list[ $i ] }},);
	
	}

}
chop $links;

# build the html
my $javascript =  &same_breath;
my $data       =  qq(<script type="text/javascript">var corpus = {nodes:[$nodes],\nlinks:[$links]};</script>\n);
$html          =  &template;
$html          =~ s/##JAVASCRIPT##/$javascript/e;
$html          =~ s/##MATRIX##/$matrix/e;
$html          =~ s/##DATA##/$data/e;
$html          =~ s/##TITLEQUERY##/" :: $query"/eg;
$html          =~ s/##QUERY##/$query/eg;
$html          =~ s/##BREATH##/$radius/eg;
$html          =~ s/##DETAIL##/$threshold/eg;
$html          =~ s/##WORK##/$work/eg;

# done
print $cgi->header;
print $html;
exit;




sub template {

	return <<EOT;
<html>
<head>
	<script type="text/javascript" src="../lib/protovis.js"></script>
	##DATA##
	<link rel="stylesheet" type="text/css" href="http://infomotions.com/etc/css/jquery-ui.css" />
	<script type="text/javascript" src="http://infomotions.com/etc/js/jquery-core.js"></script>
	<script type="text/javascript" src="http://infomotions.com/etc/js/jquery-addons.js"></script>
	<script type="text/javascript">
		
		// show the size dialog
		function tips() {
			\$("#tips").dialog(
				{
					disabled: false,
					height: 375,
					modal: true,
					resizable: true,
					title: 'Usage tips',
					width: 600
				}
			);
		};
		
		function matrix() {
			\$("#matrix").dialog(
				{
					disabled: false,
					height: 375,
					modal: true,
					resizable: true,
					title: 'Matrix',
					width: 700
				}
			);
		};
		
	</script>
	<title>Network diagram</title>
	<style>
		li { margin-bottom: 1em }
		#form { margin-left: 2em }
		#content { margin-right: 1%; font-size: small }
		#tips { display: none; font-size: small }
		#matrix { display: none; font-size: small }
	</style>
</head>
<body style='margin: 3%'>

##JAVASCRIPT##

<div id='content'>
	<h1>Network diagram</h1>
	
	<p>Play with this page to literally see what the author said <em>in the same breath</em> when he used a given word.</p>
	
	<div id='form' >

		<script>
			\$(function() {
				\$( "#breath_slider" ).slider({
					value: ##BREATH##, min: 40, max: 80, step: 1,
					slide: function( event, ui ) { \$( "#breath" ).val( ui.value ); }
				});
				
				\$( "#detail_slider" ).slider({
					value: ##DETAIL##, min: 2, max: 8, step: 1,
					slide: function( event, ui ) { \$( "#detail" ).val( ui.value ); }
				});
				
				\$( "#breath" ).val( \$( "#breath_slider" ).slider( "value" ) );
				\$( "#detail" ).val( \$( "#detail_slider" ).slider( "value" ) );
				
			});
		</script>

		<form action="./network.cgi" method="get">
			<label>Enter a word: </label>
			<input name="q" type="text" value="##QUERY##"><br />
			<input name="w" type="hidden" value="##WORK##"><br />
			<label>Size of breath (40-80 characters):<label>
			<input id="breath" name="r" type="text" value="##BREATH##" style='border: 0'><br />
			<div id="breath_slider" style='width: 25%; margin-bottom: 1em'></div>
			<label>Amount of detail (2-8 words):</label> <input id="detail" name="t" type="text" value="##DETAIL##" style='border: 0' >
			<div id="detail_slider" style='width: 25%'></div>
			<button type="submit">Go</button><br />
			<a href="javascript:tips()">Usage tips</a>
		</form>
	</div>

	<!-- tips dialog box -->
	<div id="tips">
		<p>How to get the most out of this application:</p>
		<ol><li>Enter or a word (or "regular expression") to locate in the book. This is your query.</li>
		<li>Change the size of the <strong>breath</strong> to increase or decrease the number of characters on either side of the query where co-occurances will be found. Values between 40 and 80 work well.</li>
		<li>Change the amount of <strong>detail</strong> to increase or decrease the number of co-occurances to identify for each query. Values between 4 and 7 work well.</li>
		<li>Adjust the breath and detail until the resulting diagram forms simple patterns with very few crossing lines.</li>
		</ol>
		<p>The resulting graphic will tell you something about the text, and you will be doing "distant reading".</p>
	</div>

	<!-- matrix -->
	<div id="matrix">
	<p>The visualization is based on the following matrix of terms, starting with and building upon  "##QUERY##":</p>
	##MATRIX##
	</div>

	<p>Zoom in and out to see detail. Drag nodes to simplify the diagram. Note enclosed polygons to "read" coherent thoughts. (<a href="javascript:matrix()">Matrix</a>)<p>


	<p>For more information, see the <a href="http://infomotions.com/blog/2011/01/visualizing-co-occurrences-with-protovis/" target="_blank">blog posting</a>.</p>
				
</div>
</body>
</html>

EOT

}


