Snake - Using Trie to Find Words Comprised of Provided Characters

At the Chinese New Year Celebration Party in our company, the host gives us a word puzzle:
Give you 5 characters: S, N, A, K, E (as this year is year of Snake.), write down all words that are only composed of these 5 characters, each character can occur 0 or multiple times.

This is a funny algorithm question, and can be solved using Tire like below.

We read word from a dictionary file, build a Trie, when try to get all words comprised of these candidate characters, we use depth-first order, for each valid character in the first layer, iterate all valid characters in second layer, and go on.

When construct this Trie:
If this trie is going to be searched multiple times for different candidate characters, we can insert all words into this Trie.
If we only answer this question one time, then we only insert words that are comprised of only these candidate characters.

The code is like below: You can review complete code in Github.
Class Snake

package org.codeexample.jefferyyuan.algorithm.wordPuzzles;
import org.codeexample.jefferyyuan.common.WordTree;
public class Snake extends WordTree {
 public Set<String> getValidWords(List<Character> candidates) {
  // change all chars to lower case.
  List<Character> tmp = new ArrayList<>(candidates.size());
  for (Character character : candidates) {
   tmp.add(Character.toLowerCase(character));
  }
  WordNode currentNode = root;
  Map<Character, WordNode> children = currentNode.getChildrenMap();
  Set<String> words = new HashSet<>();
  for (Character candidate : tmp) {
   words.addAll(getValidWords(children.get(candidate), tmp));
  }
  return words;
 }
 private Set<String> getValidWords(WordNode node, List<Character> candidates) {
  Set<String> words = new HashSet<>();
  if (node == null)return words;
  if (node.isWord()) {
   words.add(node.getWord());
  }
  Map<Character, WordNode> children = node.getChildrenMap();
  for (Character candidate : candidates) {
   WordNode chileNode = children.get(candidate);
   words.addAll(getValidWords(chileNode, candidates));
  }
  return words;
 }

 public Snake(String dictFile) throws IOException, InterruptedException {
  init(dictFile);
 }
 // Insert each word that are comprised only of chars from the dictFile into the Trie
 private void init(String dictFile, List<String> chars) throws Exception {
 }
 // Insert each word from the dictFile into the Trie
 private void init(String dictFile) throws IOException, InterruptedException {
 }
}

Class WordTree
package org.codeexample.jefferyyuan.common;
public class WordTree {
 protected WordNode root;
 public WordTree() {
  root = new WordNode(null, WordNode.TYPE_ROOT);
 }
 public void addWord(String word) {
  if (word == null) return;
  word = word.trim();
  word = fixString(word);
  if ("".equals(word)) return;
  WordNode parentNode = root, curretNode;
  for (int i = 0; i < word.length(); i++) {
   char character = word.charAt(i);
   Map<Character, WordNode> children = parentNode.getChildrenMap();
   if (children.containsKey(character)) {
    curretNode = children.get(character);
   } else {
    curretNode = new WordNode(character, WordNode.TYPE_NON_WORD);
    parentNode.addChild(curretNode);
   }
   parentNode = curretNode;
  }
  parentNode.thisIsAWord();
 }
 /**
  * This method comes from
  * http://logos.cs.uic.edu/340/assignments/Solutions/Wordpopup/curso/trie.java
  */
 public String fixString(String str) {
  int index = 0; // starting index is 0

  // convert the string to lower case
  str = str.toLowerCase();

  // convert the String to an array of chars to easily
  // manipulate each char
  char[] myChars = str.toCharArray(); // holds the old String
  char[] newChars = new char[str.length()]; // will make up the new String

  // loop until every char in myChars is tested
  for (int x = 0; x < myChars.length; x++) {
   // accept all alphabetic characters only
   if (myChars[x] >= 'a' && myChars[x] <= 'z') {
    newChars[index++] = myChars[x];
   }
  }

  // return a String consisting of the characters in newChars
  return String.valueOf(newChars);
 }

 /**
  * @param prefix
  * @return all words in this tree that starts with the prefix, <br>
  *         if prefix is null, return an empty list, if prefix is empty string,
  *         return all words in this word tree.
  */
 public List<String> wordsPrefixWith(String prefix) {
  List<String> words = new ArrayList<String>();
  if (prefix == null)
   return words;
  prefix = prefix.trim();
  WordNode currentNode = root;
  for (int i = 0; i < prefix.length(); i++) {
   char character = prefix.charAt(i);
   Map<Character, WordNode> children = currentNode.getChildrenMap();
   if (!children.containsKey(character)) {
    return words;
   }
   currentNode = children.get(character);
  }
  return currentNode.subWords();
 }

 /**
  * @param word
  * @return whether this tree contains this word, <br>
  *         if the word is null return false, if word is empty string, return
  *         true.
  */
 public boolean hasWord(String word) {
  if (word == null) return false;
  word = word.trim();
  if ("".equals(word)) return true;
  WordNode currentNode = root;
  for (int i = 0; i < word.length(); i++) {
   char character = word.charAt(i);
   Map<Character, WordNode> children = currentNode.getChildrenMap();
   if (!children.containsKey(character)) {
    return false;
   }
   currentNode = children.get(character);
  }
  // at last, check whether the parent node contains one null key - the
  // leaf node, if so return true, else return false.
  return currentNode.getChildrenMap().containsKey(null);
 }

 public static class WordNode {
  private Character character;
  private WordNode parent;
  private Map<Character, WordNode> childrenMap = new HashMap<Character, WordNode>();

  private int type;
  public static int TYPE_ROOT = 0;
  public static int TYPE_NON_WORD = 1;
  public static int TYPE_WORD = 2;

  public WordNode(Character character, int type) {
   this.character = character;
   this.type = type;
  }

  /**
   * @return all strings of this sub tree
   */
  public List<String> subWords() {
   List<String> subWords = new ArrayList<String>();
   String prefix = getPrefix();
   List<String> noPrefixSubWords = subWordsImpl();
   for (String noPrefixSubWord : noPrefixSubWords) {
    subWords.add(prefix + noPrefixSubWord);
   }
   return subWords;
  }

  public boolean isWord() {
   return type == TYPE_WORD;
  }

  /**
   * Indicate this node represents a valid word.
   */
  public void thisIsAWord() {
   type = TYPE_WORD;
  }
  public String getWord() {
   if (isWord()) {
    return getPrefix() + character;
   } else {
    throw new RuntimeException("Not a valid word.");
   }
  }
  private String getPrefix() {
   StringBuilder sb = new StringBuilder();
   WordNode parentNode = this.parent;
   while (parentNode != null) {
    if (parentNode.getCharacter() != null) {
     sb.append(parentNode.getCharacter());
    }
    parentNode = parentNode.parent;
   }
   return sb.reverse().toString();
  }

  private List<String> subWordsImpl() {
   List<String> words = new ArrayList<String>();
   Iterator<Character> keyIterator = childrenMap.keySet().iterator();
   while (keyIterator.hasNext()) {
    Character key = keyIterator.next();
    if (key == null) {
     words.add(convertToString(this.character));
    } else {
     WordNode node = childrenMap.get(key);
     List<String> childWords = node.subWordsImpl();
     for (String childWord : childWords) {
      words.add(convertToString(this.character) + childWord);
     }
    }
   }
   return words;
  }
  public void addChild(WordNode child) {
   child.parent = this;
   childrenMap.put(child.getCharacter(), child);
  }
  public Character getCharacter() {
   return character;
  }
  public Map<Character, WordNode> getChildrenMap() {
   return childrenMap;
  }
  public String toString() {
   return "WordNode [character=" + character + ", type=" + typeToString()
     + ", childrenMap.size=" + childrenMap.size() + "]";
  }
  private String convertToString(Character character) {
   return (character == null) ? "" : String.valueOf(character);
  }
  private String typeToString() {
   String result = "";
   if (type == TYPE_ROOT)
    result = "ROOT";
   else if (type == TYPE_NON_WORD)
    result = "NOT_WORD";
   else if (type == TYPE_WORD)
    result = "WORD";
   return result;
  }
 }
}
References:
TRIE data structure
http://stevedaskam.wordpress.com/2009/05/28/trie-structures/
http://www.technicalypto.com/2010/04/trie-in-java.html

Radix/PATRICIA Trie
Radix/PATRICIA Trie is a space-optimized trie data structure where each node with only one child is merged with its child.
This makes them much more efficient for small sets (especially if the strings are long) and for sets of strings that share long prefixes.
Java implementation
Post a Comment

Labels

Java (159) Lucene-Solr (110) All (58) Interview (58) J2SE (53) Algorithm (43) Soft Skills (36) Eclipse (34) Code Example (31) Linux (24) JavaScript (23) Spring (22) Windows (22) Web Development (20) Nutch2 (18) Tools (18) Bugs (17) Debug (15) Defects (14) Text Mining (14) J2EE (13) Network (13) PowerShell (11) Chrome (9) Design (9) How to (9) Learning code (9) Performance (9) UIMA (9) html (9) Dynamic Languages (8) Http Client (8) Maven (8) Security (8) Trouble Shooting (8) bat (8) blogger (8) Big Data (7) Continuous Integration (7) Google (7) Guava (7) JSON (7) Problem Solving (7) ANT (6) Coding Skills (6) Database (6) Scala (6) Shell (6) css (6) Algorithm Series (5) Cache (5) IDE (5) Lesson Learned (5) Programmer Skills (5) System Design (5) Tips (5) adsense (5) xml (5) AIX (4) Code Quality (4) GAE (4) Git (4) Good Programming Practices (4) Jackson (4) Memory Usage (4) Miscs (4) OpenNLP (4) Project Managment (4) Python (4) Spark (4) Testing (4) ads (4) regular-expression (4) Android (3) Apache Spark (3) Become a Better You (3) Concurrency (3) Eclipse RCP (3) English (3) Happy Hacking (3) IBM (3) J2SE Knowledge Series (3) JAX-RS (3) Jetty (3) Restful Web Service (3) Script (3) regex (3) seo (3) .Net (2) Android Studio (2) Apache (2) Apache Procrun (2) Architecture (2) Batch (2) Bit Operation (2) Build (2) Building Scalable Web Sites (2) C# (2) C/C++ (2) CSV (2) Career (2) Cassandra (2) Distributed (2) Fiddler (2) Firefox (2) Google Drive (2) Gson (2) Html Parser (2) Http (2) Image Tools (2) JQuery (2) Jersey (2) LDAP (2) Life (2) Logging (2) Software Issues (2) Storage (2) Text Search (2) xml parser (2) AOP (1) Application Design (1) AspectJ (1) Chrome DevTools (1) Cloud (1) Codility (1) Data Mining (1) Data Structure (1) ExceptionUtils (1) Exif (1) Feature Request (1) FindBugs (1) Greasemonkey (1) HTML5 (1) Httpd (1) I18N (1) IBM Java Thread Dump Analyzer (1) JDK Source Code (1) JDK8 (1) JMX (1) Lazy Developer (1) Mac (1) Machine Learning (1) Mobile (1) My Plan for 2010 (1) Netbeans (1) Notes (1) Operating System (1) Perl (1) Problems (1) Product Architecture (1) Programming Life (1) Quality (1) Redhat (1) Redis (1) Review (1) RxJava (1) Solutions logs (1) Team Management (1) Thread Dump Analyzer (1) Troubleshooting (1) Visualization (1) boilerpipe (1) htm (1) ongoing (1) procrun (1) rss (1)

Popular Posts