import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.sql.ResultSet; import java.sql.SQLException; public class Compare { protected QueryRunner queryRunner; public Compare() { queryRunner = new DefaultQueryRunner(); try { queryRunner.openConnection(); } catch (SQLException sqle) { throw new RuntimeException(sqle); } Word w; try { while ((w = getWord("Tolkien")) != null) { System.out.println("Word is "+w); Collection collection = getCandidateStrings(w.word); findBestMatch(w, collection); } } catch (SQLException sqle) { sqle.printStackTrace(System.out); } finally { try { queryRunner.closeConnection(); } catch (Exception e) { e.printStackTrace(System.out); } } } /** * Discovers which word string in the database is the best match for the given 'baseWord' and stores * the best match and its similarity */ protected Word findBestMatch(Word baseWord, Collection collection) { double bestScore = -1; Word bestMatch = null; Iterator iter = collection.iterator(); while (iter.hasNext()) { Word w = (Word) iter.next(); double similarity = WhiteSimilarity.compareStrings(baseWord.word, w.word); if (similarity > bestScore) { bestScore = similarity; bestMatch = w; } } //System.out.println("Best Match for "+baseWord+" is "+bestMatch+", similarity = "+bestScore); storeBestMatch(baseWord, bestMatch, bestScore); return bestMatch; } protected void storeBestMatch(Word w, Word bestMatch, double bestScore) { try { String sql = "insert into matches set word='"+w.word+"', word_id="+w.id+",best='"+bestMatch.word+"', lang='"+bestMatch.lang+"', similarity="+bestScore+";"; System.out.println(sql); int rows = queryRunner.runUpdate(sql); } catch (SQLException sqle) { sqle.printStackTrace(); } } /** * Find a Tolkien word that has not been considered yet * @return null if there are no more words to consider */ protected Word getWord(String lang) throws SQLException { String query = "select w.word, w.word_id FROM words w LEFT JOIN matches m USING (word_id) WHERE w.lang='"+lang+"' and ISNULL(m.word_id) limit 1;"; ResultSet rs = queryRunner.runQuery(query); if (rs.next()) { String word = rs.getString(1); int id = rs.getInt(2); return new Word(word,id,lang); } else { return null; } } /** * Assumes input parameter is at least two characters long * @return a Collection of Word objects */ protected Collection getCandidateStrings(String str) throws SQLException { String firstChars = str.substring(0,2); String query = "select word, word_id, lang from words where word like '"+firstChars+"%' and lang <> 'tolkien';"; System.out.println("Query is "+query); ResultSet rs = queryRunner.runQuery(query); ArrayList results = new ArrayList(); int rowCount = 0; while (rs.next()) { String word = rs.getString(1); int id = rs.getInt(2); String lang = rs.getString(3); Word w = new Word(word, id, lang); results.add(w); rowCount++; } System.out.println("Returned "+rowCount+" rows"); return results; } class Word { private String word; private int id; private String lang; public Word(String wd, int wdId, String language) { word = wd; id = wdId; lang = language; } public String toString() { StringBuffer buf = new StringBuffer("Word["); buf.append(word); buf.append(","); buf.append(id); buf.append(","); buf.append(lang); buf.append("]"); return buf.toString(); } } public static void main(String[] args) { new Compare(); } }