Algorithms: Find Valid n-Characters-Word from m Characters Iteratively


I was asked to help solve the following word puzzle: find valid n-characters-word from m characters.

In the following example, The answer is combine.
I am not good at this kind of game. But I am a programmer, so I can write a program to do it.

I use open source English dictionary libraries:
WordNet http://wordnet.princeton.edu/wordnet/download/.
JWI: http://projects.csail.mit.edu/jwi/.
JWI is a Java library for interfacing with Wordnet.

First I will load all English words into memory, later, when we call dict.getIndexWord, it will directly search the word HashMap in the memory.
private void init(String dictFile) throws IOException, InterruptedException {
  dict = new RAMDictionary(new File(dictFile), ILoadPolicy.NO_LOAD);
  dict.open();
  dict.load(true);
 } 
In previous post, I explained how to solve it with recursion, and the disadvantage of it: requires a lot of memory.

In this article, I solve this problem iteratively.
The main difference is that:
1. Get all combinations of n-characters-word from m characters: order is not important.
2. For every n-characters-word returned in step 1, using counting quickPerm algorithm to iterate each word in its permutations, if it is a valid word, save it to a list.

The code is like below:
public List getAllValidWords(String str, int len) {
  List result = new ArrayList<>();
  List nStrs = getNChars(str, len);

  for (String aStr : nStrs) {
   result.addAll(getValidWords(aStr));
  }

  return result;
 }

Another similar word game is to find the longest words from m characters.
To solve this, I call getAllValidWords(str, n), getAllValidWords(str, n-1) etc in multiple threads.

Implementation
The code is like below: you can review the complete code at Github.
The output is like below:
getAllValidWords took 755 mill seconds, found [combine, newcomb]
getMaxLenValidWord took 17601, len: 8 found [combine, newcomb]

package org.codeexample.algorithms.collected.wordpuzzle;
import edu.mit.jwi.RAMDictionary;
import edu.mit.jwi.data.ILoadPolicy;
import edu.mit.jwi.item.IIndexWord;
import edu.mit.jwi.item.POS;

public class WordPuzzleIteratively {
 private RAMDictionary dict;
 public WordPuzzleIteratively(String dictFile) throws IOException,
   InterruptedException {
  init(dictFile);
 }
 private void init(String dictFile) throws IOException, InterruptedException {
  dict = new RAMDictionary(new File(dictFile), ILoadPolicy.NO_LOAD);
  dict.open();
  dict.load(true);
 }
 public void close() {
  dict.close();
 }

 public List<String> getAllValidWords(String str, int len) {
  long start = new Date().getTime();
  List<String> result = new ArrayList<>();
  List<String> nStrs = getNChars(str, len);

  for (String aStr : nStrs) {
   result.addAll(getValidWords(aStr));
  }
  System.out.println("getAllValidWords took "
    + (new Date().getTime() - start) + " mill seconds, found " + result);
  return result;
 }

 public List<String> getMaxLenValidWordThreaded(final String str)
   throws MalformedURLException, IOException, InterruptedException {
  final ArrayList<ArrayList<String>> list = new ArrayList<ArrayList<String>>();
  if (str == null || str.length() == 0)
   return new ArrayList<>();
  long start = new Date().getTime();

  for (int len = str.length(); len > 0; len--) {
   list.add(new ArrayList<String>());
  }
  ExecutorService executor = Executors.newFixedThreadPool(str.length());
  for (int len = str.length(); len > 0; len--) {
   final int length = len;
   Runnable task = new Runnable() {
    @Override
    public void run() {

     long innerStart = new Date().getTime();
     ArrayList<String> result = (ArrayList<String>) getAllValidWords(str,
       length);
     System.out.println("getMaxLenValidWordThreaded len: " + length
       + " took " + (new Date().getTime() - innerStart) + ", found "
       + result.size() + " : " + result);

     list.set(length, result);
    }
   };
   executor.submit(task);
  }

  executor.shutdown();
  boolean terminated = executor.awaitTermination(Long.MAX_VALUE,
    TimeUnit.SECONDS);
  if (!terminated) {
   throw new RuntimeException("Request takes too much time");
  }

  List<String> result = new ArrayList<>();

  int index = str.length() - 1;
  for (; index >= 0; index--) {
   result = list.get(index);
   if (!result.isEmpty())
    break;
  }
  System.out.println("getMaxLenValidWord took "
    + (new Date().getTime() - start) + ", len: " + (index + 1) + " found "
    + result);
  return result;
 }

 public List<String> getMaxLenValidWord(String str)
   throws MalformedURLException, IOException {
  long start = new Date().getTime();
  List<String> list = new ArrayList<>();
  for (int len = str.length(); len > 0; len--) {
   long innerStart = new Date().getTime();
   list = getAllValidWords(str, len);
   System.out.println("getMaxLenValidWord len: " + len + " took "
     + (new Date().getTime() - innerStart));
   if (!list.isEmpty()) {
    break;
   }
  }
  System.out.println("getMaxLenValidWord took "
    + (new Date().getTime() - start));
  return list;
 }

 /**
  * Idea comes from book: Cracking code interview.
  */
 private List<String> getNChars(String strs, int len) {
  // for each char, it can be in the response or not, if
  List<String> nStrs = new ArrayList<String>();
  long max = 1 << strs.length();
  for (int i = 0; i < max; i++) {
   StringBuilder sb = new StringBuilder(len);
   int k = i;
   int index = 0;
   while (k > 0) {
    if ((k & 1) > 0) {
     sb.append(strs.charAt(index));
    }
    k >>= 1;
    index++;
   }

   if (sb.length() == len) {
    nStrs.add(sb.toString());
   }
  }
  return nStrs;
 }

 /**
  * Ideas comes from: The Counting QuickPerm Algorithm:
  * http://www.freewebs.com/permute/quickperm.html<br>
  * http://stackoverflow.com/questions/11915026/permutations-of-a-string-using-
  * iteration
  */
 private List<String> getValidWords(String str) {
  List<String> validWords = new ArrayList<>();
  if (isValidWorld(str)) {
   validWords.add(str);
  }
  char[] a = str.toCharArray();
  int n = a.length;
  int[] p = new int[n]; // Index control array initially all zeros
  int i = 1;
  while (i < n) {
   if (p[i] < i) {
    int j = ((i % 2) == 0) ? 0 : p[i];
    swap(a, i, j);
    String word = toString(a);
    if (isValidWorld(word)) {
     validWords.add(word);
    }
    p[i]++;
    i = 1;
   } else {
    p[i] = 0;
    i++;
   }
  }
  return validWords;
 }

 private static String toString(char[] a) {
  StringBuilder builder = new StringBuilder();
  builder.append(a);
  return builder.toString();
 }

 private static void swap(char[] a, int i, int j) {
  char temp = a[i];
  a[i] = a[j];
  a[j] = temp;
 }

 public static void main(String[] args) throws Exception {
  String str = "oivmwujbecn";
  int len = 7;
  WordPuzzleIteratively instance = new WordPuzzleIteratively(
    "C:/Program Files (x86)/WordNet/2.1/dict");

  List<String> list = null;
  // getAllValidWords took 755 mill seconds, found [combine, newcomb]
  list = instance.getAllValidWords(str, len);
  System.out.println(list.size());
  System.out.println(list);

  // getMaxLenValidWord took 17601, len: 8 found [combine, newcomb]
  list = instance.getMaxLenValidWordThreaded(str);
  System.out.println(list.size());
  System.out.println(list);
  
  instance.close();
 }

 private boolean isValidWorld(String word) {
  Boolean isWord = false;
  IIndexWord idxWord = dict.getIndexWord(word, POS.NOUN);
  if (idxWord != null && !idxWord.getWordIDs().isEmpty()) {
   isWord = true;
  }
  return isWord;
 }
}

Labels

adsense (5) Algorithm (69) Algorithm Series (35) Android (7) ANT (6) bat (8) Big Data (7) Blogger (14) Bugs (6) Cache (5) Chrome (19) Code Example (29) Code Quality (7) Coding Skills (5) Database (7) Debug (16) Design (5) Dev Tips (63) Eclipse (32) Git (5) Google (33) Guava (7) How to (9) Http Client (8) IDE (7) Interview (88) J2EE (13) J2SE (49) Java (186) JavaScript (27) JSON (7) Learning code (9) Lesson Learned (6) Linux (26) Lucene-Solr (112) Mac (10) Maven (8) Network (9) Nutch2 (18) Performance (9) PowerShell (11) Problem Solving (11) Programmer Skills (6) regex (5) Scala (6) Security (9) Soft Skills (38) Spring (22) System Design (11) Testing (7) Text Mining (14) Tips (17) Tools (24) Troubleshooting (29) UIMA (9) Web Development (19) Windows (21) xml (5)