#!/usr/local/bin/perl -w # This is the script for second screen of the web-interface. # initialize the error message string my $err_msg = ""; my $path = ""; my $perl5lib = ""; #open the configuration file (./config.txt) and read the environment variables: PATH and PERL5LIB open(CONF,") { # read the PATH settings if($config=~/^PATH=(.+)/) { $path = $1; chomp $path; $path =~ s/^\s//; } # read the PERL5LIB settings if($config=~/^PERL5LIB=(.+)/) { $perl5lib = $1; chomp $perl5lib; $perl5lib =~ s/^\s//; } } if($path eq "") { $err_msg .= "Error!! PATH value not specified - please initialize the SC-cgi/config.txt file
"; } if($perl5lib eq "") { $err_msg .= "Error!! PERL5LIB value not specified - please initialize the SC-cgi/config.txt file
"; } # set the ENV variables $ENV{'PATH'}=$path; $ENV{'PERL5LIB'}=$perl5lib; # -------------------------------------------------------------------- use CGI; $CGI::DISABLE_UPLOADS = 0; # Create the URL for the form-action my $host = $ENV{'HTTP_HOST'}; $q=new CGI; print $q->header; print $q->start_html("SenseClusters"); # check if error occurred during reading the PATH and PERL5LIB variables if($err_msg ne "") { error($q, $err_msg); } $clustype=$q->param("clustype"); $prefix=$q->param("prefix"); if(!$prefix) { $prefix="user"; } if($prefix =~ m/^([\w\d_\-]+)$/) { $prefix=$1; } else { error($q,"Invalid Prefix value!"); } $usr_dir="user_data/". $prefix.time(); $status=system("mkdir $usr_dir"); if($status!=0) { error($q,"Can not create the user directory $usr_dir"); } ######################## # Test and Train Scope # ######################## $scope_test=$q->param("scope_test"); if($scope_test) { if($scope_test =~ m/^(\d+)$/) { $scope_test=$1; } else { error($q,"Invalid TEST Scope value!"); } } $scope_train=$q->param("scope_train"); if($scope_train) { if($scope_train =~ m/^(\d+)$/) { $scope_train=$1; } else { error($q,"Invalid TRAIN Scope value!"); } } #format $precision = $q->param("precision"); $format='f16.' . $precision; ################ # Feature Type # ################ $feature_type=$q->param("feature"); ################ # Split Type # ################ $split=$q->param("split"); if($split) { if($split =~ m/^(\d+)$/) { if($split >= 1 && $split <= 99) { $split=$1; } else { error($q,"The Split value can be between 1 to 99 (inclusive)."); } } else { error($q,"Invalid Split value!"); } } #################### # loading Testfile # #################### $testfile=$q->param("testfile"); if(!$testfile) { print "Please specify the Testfile.
\n"; exit; } $test="$usr_dir/$prefix-test.xml"; open(TEST,">$test") || error($q,"Error in uploading Testfile."); while(read($testfile,$buffer,1024)) { print TEST $buffer; } close TEST; ##################### # loading Trainfile # ##################### $trainfile=$q->param("trainfile"); if($trainfile) { $train="$usr_dir/$prefix-train.plain"; open(TRAIN,">$train") || error($q,"Error in uploading Trainfile."); seek($trainfile,0,0); while(read($trainfile,$buffer,1024)) { print TRAIN $buffer; } } close TRAIN; # Check if both TRAIN and split option specified! if($trainfile && $split) { error($q,"Split and TRAIN file - both options cannot be used together."); } ##################### # Loading Tokenfile # ##################### $token="$usr_dir/token.regex"; open(TOKOUT,">$token") || error($q,"Error in loading Tokenfile."); if($q->param("token")) { $tokenfile=$q->param("token"); while(read($tokenfile,$buffer,128)) { print TOKOUT $buffer; } } else { open(TOKIN,"token.regex") || error($q,"Error in opening default token.regex file."); while() { print TOKOUT; } close TOKIN; } close TOKOUT; ###################### # Loading Targetfile # ###################### if($clustype eq "hclust" || $clustype eq "lsa-hclust") { if($q->param("target")) { $target="$usr_dir/target.regex"; open(TARGET_OUT,">$target") || error($q,"Error in loading Targetfile."); $targetfile=$q->param("target"); while(read($targetfile,$buffer,128)) { print TARGET_OUT $buffer; } close TARGET_OUT; } else { $target="$usr_dir/target.regex"; open(TARGET_OUT,">$target") || error($q,"Error in loading Targetfile."); open(TARGET_IN,"target.regex") || error($q,"Error in opening default target.regex file."); while() { print TARGET_OUT; } close TARGET_IN; close TARGET_OUT; } } #else #{ # open(TARGET_IN,"target.regex") || error($q,"Error in opening default target.regex file."); # while() # { # print TARGET_OUT; # } # close TARGET_IN; #} ######################### # Writing to Param file # ######################### $param_file="$usr_dir/param_file"; open(PARAM,">$param_file") || error($q,"Error in opening PARAM file."); # word clustering / lsa options if($clustype eq "wclust") { print PARAM "WORDCLUST=ON\n"; } elsif($clustype eq "lsa-fclust") { print PARAM "LSAFEATCLUST=ON\n"; } elsif($clustype eq "lsa-hclust" || $clustype eq "lsa-hlclust") { print PARAM "LSACONTEXTCLUST=ON\n"; } print PARAM "TEST=\"$prefix-test.xml\"\n"; if(defined $train) { print PARAM "TRAIN=\"$prefix-train.plain\"\n"; } print PARAM "TOKEN=\"token.regex\"\n"; if(defined $target) { print PARAM "TARGET=\"target.regex\"\n"; } if($prefix) { print PARAM "PREFIX=\"$prefix\"\n"; } print PARAM "FEATURE=$feature_type\n"; print PARAM "FORMAT=$format\n"; if($scope_test) { print PARAM "SCOPE_TEST=$scope_test\n"; } if($scope_train) { print PARAM "SCOPE_TRAIN=$scope_train\n"; } if($split) { print PARAM "SPLIT=$split\n"; } close PARAM; ######################## # Feature Options Form # ######################## print $q->start_form(-action=>'third.cgi', -method=>'post', -enctype=>'multipart/form-data'); print "

SenseClusters Web Interface

Clusters text instances based on their contextual similarity ...

"; print $q->h3("Step 2:"); print " "; } elsif($clustype eq "lsa-fclust") { print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type "; } elsif($feature_type eq "uni" && ($clustype eq "hclust" || $clustype eq "hlclust")) { print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type "; } elsif($clustype eq "lsa-hclust" || $clustype eq "lsa-hlclust") { print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type "; } else { print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type "; } print ""; print ""; print ""; print ""; print ""; if($feature_type ne "uni") { print ""; print ""; print ""; print "
"; if($clustype eq "wclust") { print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type ", $q->popup_menu(-name=>'context', -values=>['o2'], -labels=>{o2=>'2nd Order'}, -default=>o2), "
", $q->popup_menu(-name=>'context', -values=>['o1'], -labels=>{o1=>'1st Order'}, -default=>o1), "
", $q->popup_menu(-name=>'context', -values=>['o1'], -labels=>{o1=>'1st Order'}, -default=>o1), "
", $q->popup_menu(-name=>'context', -values=>['o2'], -labels=>{o2=>'2nd Order'}, -default=>o2), "
", $q->popup_menu(-name=>'context', -values=>['o1', 'o2'], -labels=>{o1=>'1st Order', o2=>'2nd Order'}, -default=>o2), "
", $q->a({-href=>"http://$host/SC-htdocs/help.html#space_space"}, "Clustering Space"), " ", $q->popup_menu(-name=>'space', -values=>['vector', 'similarity'], -default=>'vector'), "
", $q->a({-href=>"http://$host/SC-htdocs/help.html#cluststop_cs"}, "Cluster Stopping"), " "; print " Set manually"; print " Use cluster stopping measures"; print "
Use ", $q->a({-href=>"http://$host/SC-htdocs/help.html#binary"}, "Binary Vectors"), "", $q->checkbox(-name=>'binary',-label=>''), "

Load the ", $q->a({-href=>"http://$host/SC-htdocs/help.html#stop_stopfile"},"STOP file"), " (Perl Regex Format) ", $q->filefield(-name=>'stop', -size=>30),""; print $q->checkbox(-name=>'default_stop',-label=>''), " Use ", $q->a({-href=>"http://$host/SC-htdocs/stopfile"}, "Default"),"
Lower ", $q->a({-href=>"http://$host/SC-htdocs/help.html#remove_f"},"Frequency Cutoff"), " (Integer) ", $q->textfield(-name=>'remove', -size=>5,-value=>5,-maxlength=>7), " [Use 0 to disable this option]
", $q->a({-href=>"http://$host/SC-htdocs/help.html#window_w"},"Window"), " (Integer) " , $q->textfield(-name=>'window', -size=>5, -value=>2, -maxlength=>7), " [Use 0 to disable this option]
", $q->a({-href=>"http://$host/SC-htdocs/help.html#stat_stat"},"Statistical Test"), " of Association ", $q->popup_menu(-name=>'stat', -labels=>{ll=>"Log-Likelihood", x2=>"Chi-Square", dice=>"Dice", phi=>"Phi", odds=>"Odds Ratio", pmi=>"Point-wise Mutual Information", tmi=>"True Mutual Information", tscore=>"T-Score", leftFisher=>"Left Fishers", rightFisher=>"Right Fishers", none=>"None"}, -values=>['ll', 'x2', 'dice', 'phi', 'odds', 'pmi', 'tmi', 'tscore', 'leftFisher', 'rightFisher', 'none'], -default=>'none'), "
", $q->a({-href=>"http://$host/SC-htdocs/help.html#stat_rank_n"},"Statistical Rank"), " Cutoff (Integer) ", $q->textfield(-name=>'stat_rank', size=>5, -maxlength=>7), "
", $q->a({-href=>"http://$host/SC-htdocs/help.html#stat_score_s"},"Statistical Score"), " Cutoff (Real Number) ", $q->textfield(-name=>'stat_score', size=>10, -maxlength=>7), "
"; } print "
"; print $q->hidden(-name=>'usr_dir', -value=>$usr_dir); print $q->hidden(-name=>'prefix', -value=>$prefix); print $q->hidden(-name=>'clustype', -value=>$clustype); print $q->end_form; print $q->p; print $q->a({-href=>"http://$host/SC-htdocs/help.html"},"Help"); print "
"; print $q->end_html; sub error { my ($q,$reason) = @_; print $q->h1("Error"), $q->p($q->i($reason)), $q->end_html; exit; }