use strict; use warnings; use DBI; use FindBin (); my $DB_TABLE = "drow_dictionary"; my $LANG0 = "Drow"; my $LANG1 = "Common"; my $NOTES = "Notes"; require "drow_pwx.pl"; # Contains database credentials loadDictionary( "words.txt" ); sub loadDictionary { my( $DICTFILE ) = $FindBin::RealBin . $_[0]; my @lines; # Read in the dictionary into memory print "Reading file '$DICTFILE'..."; open( DICTFILE,'<', $DICTFILE ) || die( "Couldn't open file: $!" );; my( @filelines ) = ; close( DICTFILE ); foreach $line (@filelines) { push( @lines, $line ) if( $line =~ m/\w/ ); } @filelines = (); print (0+@lines)." lines read.\n"; print "Sorting..."; @lines = sort( @lines ); print "Done.\n"; # Setup Database Connection my $dbh = DBI->connect("DBI:mysql:$DATABASE:localhost", $DB_USERNAME, $DB_PASSWORD ) || die( "Could not establish database connection: ".$DBI::errstr ); $dbh->do( "delete from $DB_TABLE" ); # Cycle through lines, adding them into database my( $query ) ="insert into $DB_TABLE ($LANG0, $LANG1, $NOTES) values (?,?,?) "; my $sth = $dbh->prepare($query); foreach $line (@lines ) { my( $notes ) = ""; # Simple word0=word1 line $line =~ s/\`/\'/g; # Change back-quotes to normal quotes $line =~ s/\_/\ /g; # Change underscores to spaces $line =~ s/\+/\ /g; # Change plusses to spaces if( $line =~ m/^([\w\-\'\s\,\+]*)\=(.*)/ ) { $word1 = $1; $word2 = $2; # Get rid of email address of contributed words if( $word2 =~ m/\=/ ) { $word2 = $`; } # Extract notes if any if( $word2 =~ m/\((.*)\)/) { $notes = $1; $notes =~ s/\'/\\\'/g; $word2 = $`; } # Split if it has commas my( @words1, @words2 ); if( $word1 =~ m/\,/ ) { @words1 = split( ',', $word1 ); } else { @words1 = ($word1); } if( $word2 =~ m/\,/ ) { @words2 = split( ',', $word2 ); } else { @words2 = ($word2); } # Do all combinations if has commas foreach $word1 (@words1) { foreach $word2 (@words2) { $word1 = &trim( $word1 ); $word2 = &trim( $word2 ); if( $notes eq "" ) { $sth->execute($word1,$word2,undef); } else { $sth->execute($word1,$word2,$notes); } } } } else { warn( "Can't parse line ==$line==\n" ); } } } # Trims whitespace off start and end of string sub trim{ my( @asz, $sz ) = @_; foreach $sz (@asz){ $sz =~ s/^\s*(.*)$/$1/; ($sz = $`) if ($sz =~ m/\s*$/); } return wantarray() ? @asz : $asz[0]; }