Programmer: Lifelong Learning: January 2013

Solr: Writing Solr Test Case

It is important to write test cases to automate tests - even your company doesn't require to check in test cases, as it can save you a lot of time to rerun manual tests whenever you change something, also it's very common that you make some change, you think it is safe, so no need to test it, but in fact it breaks something. If you have automatic tests cases, you just need run your tests before check in your code.

This is especially important when you are changing Solr code, or extending Solr, as it's very time consuming to compile, package, deploy your change to server, and run manual tests.

We can use the test framework Solr provides to run tests in embedded jetty, all we need do is to write some test code, and run it every time we make some change or before we check in our code.

SolrTestCaseJ4 has many subclasses, you can pick one that best suits your need.
In our example, we will use SolrExampleTests to test our ExtNameExtractorProcessorFactory, which will guess file type from filepath, and store in a field.
Implementation
The test case code is like below: you can review the complete code at Github.

public class EmbededTestBase extends SolrExampleTests {
  @BeforeClass
  public static void beforeClass() throws Exception {
    String solrHome = "C:/jeffery/test-environment/solr-home";
    initCore("solrconfig.xml", "schema.xml", solrHome);
  }
}
public class ExtNameExtractorProcessorFactoryTest extends EmbededTestBase {
  private String chain = "extNameExtractorChain";
  
  @Test
  public void tesExtNameExtractorChain() throws Exception {
    SolrCore core = h.getCore();
    UpdateRequestProcessorChain chained = core
        .getUpdateProcessingChain(this.chain);
    ExtNameExtractorProcessorFactory factory = ((ExtNameExtractorProcessorFactory) chained
        .getFactories()[0]);
    factory.setEnabled(true);
    assertNotNull(chained);
    
    addDoc(adoc("id", "id123", "url", "c:\\dir\\abc.txt"));
    addDoc(commit());
    
    SolrQuery query = new SolrQuery("id:id123");
    SolrServer server = getSolrServer();
    QueryResponse rsp = server.query(query);
    
    NamedList<Object> namedList = rsp.getResponse();
    SolrDocumentList docList = (SolrDocumentList) namedList.get("response");
    
    String extName = (String) docList.get(0).getFieldValue("ext_name");
    assertEquals("txt", extName);
    
    assertNumFound("ext_name:txt", 1);
    // or assertQ("Verify one ext_name:txt", req("ext_name:txt"),
    // "//result[@numFound=1]");
    
    server.deleteByQuery("*:*");// delete everything!
    server.commit();
    assertNumFound("*:*", 0); // make sure it got in
    
    addDoc(adoc("id", "id234", "url", "efg.ppt"));
    addDoc(commit());
    
    assertNumFound("ext_name:ppt", 1);
    // assertQ("Verify one ext_name:ppt", req("ext_name:ppt"),
    // "//result[@numFound=1]");
    
    factory.setEnabled(false);
  }
  
  private void addDoc(String doc) throws Exception {
    Map<String,String[]> params = new HashMap<String,String[]>();
    MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
    params.put(UpdateParams.UPDATE_CHAIN, new String[] {chain});
    SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(),
        (SolrParams) mmparams) {};
    
    UpdateRequestHandler handler = new UpdateRequestHandler();
    handler.init(null);
    ArrayList<ContentStream> streams = new ArrayList<ContentStream>(2);
    streams.add(new ContentStreamBase.StringStream(doc));
    req.setContentStreams(streams);
    handler.handleRequestBody(req, new SolrQueryResponse());
    req.close();
  }
}

Test RequestHandler

public void testRequestHandler() throws Exception {
 clearIndex();

 CVAsyncXmlUpdateRequestHandler asyncXmlHandler = new CVAsyncXmlUpdateRequestHandler();
 NamedList<Object> args = new NamedList<Object>();
 args.add("unique_field_name", "contentid");
 asyncXmlHandler.init(args);

 SolrQueryRequest req;
 req = req("param1", "param1Value");

 String xmlDoc = adoc("field1", "f1value", "commit", "true");
 ArrayList<ContentStream> streams = new ArrayList<ContentStream>();
 streams.add(new ContentStreamBase.StringStream(xmlDoc));
 ((LocalSolrQueryRequest) req).setContentStreams(streams);
 SolrQueryResponse rsp = new SolrQueryResponse();

 asyncXmlHandler.handleRequestBody(req, rsp);
 req.close();

 assertNumFound("field1:f1value", 1);
 clearIndex();
}

Solr Notes All In One

Update CSV
http://wiki.apache.org/solr/UpdateCSV
/solr/update/csv?stream.file=file_path&separator=,&fieldnames=a_list&f.accesstime.map=0:&f.ctime_tdt.map=0:&f.mtm.map=0&skip=type&literal.type=type1
curl http://localhost:8983/solr/update/csv --data-binary @books.csv -H 'Content-type:text/plain; charset=utf-8'
SoftCommit
http://localhost:8080/solr/update?commit=true&softCommit=true
Update Data Via XML
Add Data
http://mailsearchsolr:8765/solr/update?commit=true&stream.body=<add><doc><field name="id">testdoc</field></doc></add>
Delete Data
host:post/solr/update?stream.body=<delete><query>*:*</query></delete>&commit=true
Set Proxy
When used with http monitor tools like fiddle, use -x to set proxy
curl -x localhost:8888 -X PUT -H Content-Type:application/json -d {\"type\":\"string\",\"stored\":\"false\",\"indexed\":\"true\"} "http://host:port/solr/schema/fields/newfield2"
About how to use Curl to import data to Solr, read Learning Solr Code: Import Data to Solr
-- Especailly how to add " or escape ".

View Source code of file
http://localhost:8080/solr/admin/file/?contentType=text/xml;charset=utf-8&file=schema.xml

SignatureUpdateProcessorFactory to generate unqiue string

<processor class="solr.processor.SignatureUpdateProcessorFactory">
 <bool name="enabled">true</bool>
 <str name="signatureField">id</str>
 <bool name="overwriteDupes">false</bool>
 <str name="fields">fields</str>
 <str name="signatureClass">solr.processor.Lookup3Signature</str>
</processor>

BKDRHash

public long BKDRHash(String str)
{
  long seed = 131; // 31 131 1313 13131 131313 etc..
  long hash = 0;
  for(int i = 0; i < str.length(); i++)
  {
     hash = (hash * seed) + str.charAt(i);
  }
  return (hash & 0x7FFFFFFF);
}

Analyzer and Filter
ReversedWildcardFilterFactory
Use this to support leading wildcard and prefix queries.

<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>

Code
Change NamedList to SolrParams
In init method, we should convert NamedList to SolrParams first, as SolrParams.get will search multiple paths including defaults settings.

NamedList args;
SolrParams params = SolrParams.toSolrParams(args);
Object obj = params.get(PARAM_MAX_THREAD);

Change SolrParams to NamedList

NamedList namedList = params.toNamedList();

Parse parameters in solrconfig or request
SolrParams params = SolrParams.toSolrParams(args);
List<String> fromFields = org.apache.solr.common.util.StrUtils.splitSmart(str, ",", true);
org.apache.solr.update.processor.SignatureUpdateProcessorFactory.inform(SolrCore)

final SchemaField field = core.getSchema().getFieldOrNull(getSignatureField());
if (null == field) {
  throw new SolrException
 (ErrorCode.SERVER_ERROR,
  "Can't use signatureField which does not exist in schema: "
  + getSignatureField());
}

Algorithms: Find Valid n-Characters-Word from m Characters Iteratively

I was asked to help solve the following word puzzle: find valid n-characters-word from m characters.

In the following example, The answer is combine.

I am not good at this kind of game. But I am a programmer, so I can write a program to do it.

I use open source English dictionary libraries:
WordNet http://wordnet.princeton.edu/wordnet/download/.
JWI: http://projects.csail.mit.edu/jwi/.
JWI is a Java library for interfacing with Wordnet.

First I will load all English words into memory, later, when we call dict.getIndexWord, it will directly search the word HashMap in the memory.

private void init(String dictFile) throws IOException, InterruptedException {
  dict = new RAMDictionary(new File(dictFile), ILoadPolicy.NO_LOAD);
  dict.open();
  dict.load(true);
 }

In previous post, I explained how to solve it with recursion, and the disadvantage of it: requires a lot of memory.

In this article, I solve this problem iteratively.
The main difference is that:
1. Get all combinations of n-characters-word from m characters: order is not important.
2. For every n-characters-word returned in step 1, using counting quickPerm algorithm to iterate each word in its permutations, if it is a valid word, save it to a list.

The code is like below:

public List getAllValidWords(String str, int len) {
  List result = new ArrayList<>();
  List nStrs = getNChars(str, len);

  for (String aStr : nStrs) {
   result.addAll(getValidWords(aStr));
  }

  return result;
 }

Another similar word game is to find the longest words from m characters.
To solve this, I call getAllValidWords(str, n), getAllValidWords(str, n-1) etc in multiple threads.

Implementation
The code is like below: you can review the complete code at Github.
The output is like below:
getAllValidWords took 755 mill seconds, found [combine, newcomb]
getMaxLenValidWord took 17601, len: 8 found [combine, newcomb]

package org.codeexample.algorithms.collected.wordpuzzle;
import edu.mit.jwi.RAMDictionary;
import edu.mit.jwi.data.ILoadPolicy;
import edu.mit.jwi.item.IIndexWord;
import edu.mit.jwi.item.POS;

public class WordPuzzleIteratively {
 private RAMDictionary dict;
 public WordPuzzleIteratively(String dictFile) throws IOException,
   InterruptedException {
  init(dictFile);
 }
 private void init(String dictFile) throws IOException, InterruptedException {
  dict = new RAMDictionary(new File(dictFile), ILoadPolicy.NO_LOAD);
  dict.open();
  dict.load(true);
 }
 public void close() {
  dict.close();
 }

 public List<String> getAllValidWords(String str, int len) {
  long start = new Date().getTime();
  List<String> result = new ArrayList<>();
  List<String> nStrs = getNChars(str, len);

  for (String aStr : nStrs) {
   result.addAll(getValidWords(aStr));
  }
  System.out.println("getAllValidWords took "
    + (new Date().getTime() - start) + " mill seconds, found " + result);
  return result;
 }

 public List<String> getMaxLenValidWordThreaded(final String str)
   throws MalformedURLException, IOException, InterruptedException {
  final ArrayList<ArrayList<String>> list = new ArrayList<ArrayList<String>>();
  if (str == null || str.length() == 0)
   return new ArrayList<>();
  long start = new Date().getTime();

  for (int len = str.length(); len > 0; len--) {
   list.add(new ArrayList<String>());
  }
  ExecutorService executor = Executors.newFixedThreadPool(str.length());
  for (int len = str.length(); len > 0; len--) {
   final int length = len;
   Runnable task = new Runnable() {
    @Override
    public void run() {

     long innerStart = new Date().getTime();
     ArrayList<String> result = (ArrayList<String>) getAllValidWords(str,
       length);
     System.out.println("getMaxLenValidWordThreaded len: " + length
       + " took " + (new Date().getTime() - innerStart) + ", found "
       + result.size() + " : " + result);

     list.set(length, result);
    }
   };
   executor.submit(task);
  }

  executor.shutdown();
  boolean terminated = executor.awaitTermination(Long.MAX_VALUE,
    TimeUnit.SECONDS);
  if (!terminated) {
   throw new RuntimeException("Request takes too much time");
  }

  List<String> result = new ArrayList<>();

  int index = str.length() - 1;
  for (; index >= 0; index--) {
   result = list.get(index);
   if (!result.isEmpty())
    break;
  }
  System.out.println("getMaxLenValidWord took "
    + (new Date().getTime() - start) + ", len: " + (index + 1) + " found "
    + result);
  return result;
 }

 public List<String> getMaxLenValidWord(String str)
   throws MalformedURLException, IOException {
  long start = new Date().getTime();
  List<String> list = new ArrayList<>();
  for (int len = str.length(); len > 0; len--) {
   long innerStart = new Date().getTime();
   list = getAllValidWords(str, len);
   System.out.println("getMaxLenValidWord len: " + len + " took "
     + (new Date().getTime() - innerStart));
   if (!list.isEmpty()) {
    break;
   }
  }
  System.out.println("getMaxLenValidWord took "
    + (new Date().getTime() - start));
  return list;
 }

 /**
  * Idea comes from book: Cracking code interview.
  */
 private List<String> getNChars(String strs, int len) {
  // for each char, it can be in the response or not, if
  List<String> nStrs = new ArrayList<String>();
  long max = 1 << strs.length();
  for (int i = 0; i < max; i++) {
   StringBuilder sb = new StringBuilder(len);
   int k = i;
   int index = 0;
   while (k > 0) {
    if ((k & 1) > 0) {
     sb.append(strs.charAt(index));
    }
    k >>= 1;
    index++;
   }

   if (sb.length() == len) {
    nStrs.add(sb.toString());
   }
  }
  return nStrs;
 }

 /**
  * Ideas comes from: The Counting QuickPerm Algorithm:
  * http://www.freewebs.com/permute/quickperm.html<br>
  * http://stackoverflow.com/questions/11915026/permutations-of-a-string-using-
  * iteration
  */
 private List<String> getValidWords(String str) {
  List<String> validWords = new ArrayList<>();
  if (isValidWorld(str)) {
   validWords.add(str);
  }
  char[] a = str.toCharArray();
  int n = a.length;
  int[] p = new int[n]; // Index control array initially all zeros
  int i = 1;
  while (i < n) {
   if (p[i] < i) {
    int j = ((i % 2) == 0) ? 0 : p[i];
    swap(a, i, j);
    String word = toString(a);
    if (isValidWorld(word)) {
     validWords.add(word);
    }
    p[i]++;
    i = 1;
   } else {
    p[i] = 0;
    i++;
   }
  }
  return validWords;
 }

 private static String toString(char[] a) {
  StringBuilder builder = new StringBuilder();
  builder.append(a);
  return builder.toString();
 }

 private static void swap(char[] a, int i, int j) {
  char temp = a[i];
  a[i] = a[j];
  a[j] = temp;
 }

 public static void main(String[] args) throws Exception {
  String str = "oivmwujbecn";
  int len = 7;
  WordPuzzleIteratively instance = new WordPuzzleIteratively(
    "C:/Program Files (x86)/WordNet/2.1/dict");

  List<String> list = null;
  // getAllValidWords took 755 mill seconds, found [combine, newcomb]
  list = instance.getAllValidWords(str, len);
  System.out.println(list.size());
  System.out.println(list);

  // getMaxLenValidWord took 17601, len: 8 found [combine, newcomb]
  list = instance.getMaxLenValidWordThreaded(str);
  System.out.println(list.size());
  System.out.println(list);
  
  instance.close();
 }

 private boolean isValidWorld(String word) {
  Boolean isWord = false;
  IIndexWord idxWord = dict.getIndexWord(word, POS.NOUN);
  if (idxWord != null && !idxWord.getWordIDs().isEmpty()) {
   isWord = true;
  }
  return isWord;
 }
}

Algorithms: Find Valid n-Characters-Word from m Characters Recursively

I was asked to help solve the following word puzzle: find valid n-characters-word from m characters.
In the following example, The answer is combine.

I am not good at this kind of game. But I am a programmer, so I can write a program to do it.
I use open source English dictionary libraries:
WordNet http://wordnet.princeton.edu/wordnet/download/.
JWI: http://projects.csail.mit.edu/jwi/.
JWI is a Java library for interfacing with Wordnet.

First I will load all English words into memory, later, when we call dict.getIndexWord, it will directly search the word HashMap in the memory.
First I will load all English words into memory, later, when we call dict.getIndexWord, it will directly search the word HashMap in the memory.

private void init(String dictFile) throws IOException, InterruptedException {
  dict = new RAMDictionary(new File(dictFile), ILoadPolicy.NO_LOAD);
  dict.open();
  dict.load(true);
 }

In this article, I solve this problem with recursion.
1. Get all combinations n-characters-word of m characters.
2. Iterative every word in the list returned in step 1, determine whether it is a valid word.

Another similar word game is to find the longest words from m characters.
To solve this, I tried get m-length word, then (m-1) length word, etc. The code is like below:

public List<String> getMaxLenValidWord(String str)
   throws MalformedURLException, IOException {
  List<String> list = new ArrayList<>();
  for (int len = str.length(); len > 0; len--) {
   list = getAllValidWords(str, len);
   if (!list.isEmpty()) {
    return list;
   }
  }
  return list;
 }

This is not very efficient, as it will stores all all combinations n-characters-word of m characters in memory, this is huge. When n and m is large, it will throw OOM error.

In later post , I will explain how to solve this problem iteratively.
Implementation
The code is like below: you can review the complete code at Github.
The output is like below:
getAllValidWords took 5095 mill seconds.
getMaxLenValidWord throws Exception in thread "main" java.lang.OutOfMemoryError: GC overhead limit exceeded

package org.codeexample.algorithms.collected.wordpuzzle;
import edu.mit.jwi.RAMDictionary;
import edu.mit.jwi.data.ILoadPolicy;
import edu.mit.jwi.item.IIndexWord;
import edu.mit.jwi.item.POS;
public class WordPuzzleWithRecusion {
 private RAMDictionary dict;
 public WordPuzzleWithRecusion(String dictFile) throws IOException,
   InterruptedException {
  init(dictFile);
 }
 private void init(String dictFile) throws IOException, InterruptedException {
  dict = new RAMDictionary(new File(dictFile), ILoadPolicy.NO_LOAD);
  dict.open();
  dict.load(true);
 }
 public void close() {
  dict.close();
 }
 
 public List<String> getAllValidWords(String str, int length)
   throws MalformedURLException, IOException {
  List<String> lists = new ArrayList<String>();
  long start = new Date().getTime();
  HashSet<String> result = getNCombination(str, length);
  System.out.println("getAllValidWords took "
    + (new Date().getTime() - start) + " mill seconds.");

  for (String word : result) {
   if (isValidWorld(word)) {
    lists.add(word);
   }
  }
  System.out.println("getAllValidWords took "
    + (new Date().getTime() - start) + " mill seconds.");
  return lists;
 }

 public List<String> getMaxLenValidWord(String str)
   throws MalformedURLException, IOException {
  long start = new Date().getTime();
  List<String> list = new ArrayList<>();
  for (int len = str.length(); len > 0; len--) {
   list = getAllValidWords(str, len);
   if (!list.isEmpty()) {
    return list;
   }
  }
  System.out.println("getMaxLenValidWord took "
    + (new Date().getTime() - start) + " mill seconds, found " + list);
  return list;
 }

 /**
  * Solve the problem using recursion, need a lot of memory.
  */
 private HashSet<String> getNCombination(String strs, int len) {
  HashSet<String> result = new HashSet<String>();
  if (strs == null) { // error case
   return result;
  }
  if (len == 0 || len > strs.length())
   return result;

  if (strs.length() == 1 && len == 1) { // base case
   result.add(strs);
   return result;
  }

  HashSet<String> result1 = getNCombination(strs.substring(1), len);

  result.addAll(result1);

  String firstStr = strs.substring(0, 1);
  if (firstStr.length() == len) {
   result.add(firstStr);
  } else {
   HashSet<String> result2 = getNCombination(strs.substring(1), len - 1);
   for (String tmp : result2) {
    for (int i = 0; i < tmp.length(); i++) {
     String tmpResult = tmp.substring(0, i) + firstStr + tmp.substring(i);
     result.add(tmpResult);
    }

    result.add(tmp + firstStr);
   }
  }

  return result;
 }

 public static void main(String[] args) throws Exception {
  String str = "oivmwujbecn";
  int length = 7;

  // getAllValidWords took 5095 mill seconds.
  WordPuzzleWithRecusion instance = new WordPuzzleWithRecusion(
    "C:/Program Files (x86)/WordNet/2.1/dict");
  System.out.println(instance.getAllValidWords(str, length));

  // This will throw OOM
  System.out.println(instance.getMaxLenValidWord(str));
  instance.close();
 }

 private boolean isValidWorld(String word) {
  Boolean isWord = false;
  IIndexWord idxWord = dict.getIndexWord(word, POS.NOUN);
  if (idxWord != null && !idxWord.getWordIDs().isEmpty()) {
   isWord = true;
  }
  return isWord;
 }
}

Ant: Different Ways to Call Another Target

AntCall Task
Call another target within the same buildfile optionally specifying some properties.

<target name="default">
  <antcall target="doSomethingElse">
    <param name="param1" value="value"/>
  </antcall>
</target>

Ant antfile
Call target in another file.

<ant antfile="${liteLucene-project-basedir}/shrink-solr.xml" target="shrink">
<property name="original.home" value="${main-build-path}\${application-name}" />
<property name="shrinked.home" value="${main-build-path}\shrinked-${application-name}" />
<property name="shrinked.tmp" value="${main-build-path}\shrinked-tmp" />
</ant>

Ant Include
Include another build file into the current project.

<include file="${parent-project-build-imp-xml}" as="parent"/>
<target name="package" depends="parent.init, parent.build-package, overwrite-solr-home, create-template, parent.post-clean"/>

How is <import> different from <include>?
The short version: Use import if you intend to override a target, otherwise use include.

Subant Task
Calls a given target for all defined sub-builds.
Example-Solr's build file:

<target name="compile" description="Compile Lucene and Solr">
    <sequential>
      <subant target="compile" inheritall="false" failonerror="true">
        <fileset dir="lucene" includes="build.xml" />
        <fileset dir="solr" includes="build.xml" />
      </subant>
    </sequential>
</target>

Solr: Extend StatsComponent to Support stats.query, stats.facet and facet.topn

Statistics is very useful and important to enterprise applications, but function of Solr StatsComponent is quite limited yet.
We can easily extend it to support stats.query or stats.ranges just like Solr facet components.
stats.query and stats.ranges
The idea is that to build a query array from parameter: if the parameter is: stats=true&stats.field=size&f.size.stats.ranges=0, 1024, 10240, the query array would be size:[0, 1024}, [1024, 10240}.

If the parameter is stats=true&stats.field=size&f.size.stats.query=size:[0, 1024}&f.size.stats.query=[1024, 10240}, then the query array would be size:[0, 1024}, [1024, 10240}.

Then iterate each query, get the docSet of the query, and get the intersection between it and the baseDoc., then run getStats on the intersection docSet.

QParser qparser = QParser.getParser(tmpQuery, "lucene",req);
SolrQueryParser parser = new SolrQueryParser(qparser,req.getSchema().getDefaultSearchFieldName());
Query query = parser.parse(tmpQuery);

DocSet docSet = searcher.getDocSet(query);
DocSet interSection = docs.intersection(docSet);
NamedList<Object> stv = (NamedList<Object>) uif.getStats(searcher, interSection, facets).getStatsValues();

Label stats.query
We can also specify a label for the query, for example: f.size.stats.query={!label= "Very Old"}mtm:[ 1980-01-01xxT12:00:00Z TO 2000-01-01T12:00:00Z]
Response would be like below:

<lst name="stats_fields">
            <lst name="size">
                        <lst name="Very Old"/>…
                        <lst name="Old"/>…
                        <lst name="Recent"/>…
            </lst>
</lst>

stats.facet.facet_field.topn and stats.facet.facet_field.sortfield
Another thing that we can improve is: when we specify stats.facet=file_type, there may be many file types, we just want to know the stats for the top 2 file types that takes most disk space.

To do this, we can support parameter stats.facet.facet_field.topn=N, also add parameter stats.facet.facet_field.sortfield=sortfield. sortfield can be sum, count, max, etc.
The implementation is simple, just get the topn element from the returned result:
Please refer how to implement it.

An example solr stats request:
http://localhost:8080/solr/select?q=file_type:pdf OR file_type:ppt OR file_type:txt OR file_type:html&rows=0&stats=true&stats.field=size&f.size.stats.query={!label= "Very Old"}mtm:[ 1980-01-01xxT12:00:00Z TO 2000-01-01T12:00:00Z]&f.size.stats.query={!label="Old"}mtms:[2000-01-01T12:00:00Z TO 2010-01-01T12:00:00Z]&f.size.stats.query={!label="Recent"}mtm:[2010-01-01T12:00:00Z TO 2013-01-01T12:00:00Z]&stats.facet=file_type&f.size.stats.facet.file_type.topn=2&f.size.stats.facet.file_type.topn=sum&stats.facet=user_name
Implementation
The code is like below: you can review the complete code at Github.

class SimpleStats {
 public NamedList<Object> getStatsFields() throws IOException {
    NamedList<Object> res = new SimpleOrderedMap<Object>();
    String[] statsFs = params.getParams(StatsParams.STATS_FIELD);
    boolean isShard = params.getBool(ShardParams.IS_SHARD, false);
    if (null != statsFs) {
    for (String fieldName : statsFs) {
     String[] facets = params.getFieldParams(fieldName,
       StatsParams.STATS_FACET);
     if (facets == null) {
      facets = new String[0]; // make sure it is something...
     }

     SchemaField sf = searcher.getSchema().getField(fieldName);
     FieldType ft = sf.getType();

     List<String> queries = new ArrayList<String>(), labeList = new ArrayList<String>();
     handleQueryParams(fieldName, queries, labeList);

     NamedList<Object> facetResult = new NamedList<Object>();
     String[] facetSortFields = new String[facets.length];
     Integer[] facetTopns = new Integer[facets.length];
     parseSortFieldsAndTopn(fieldName, facets, facetSortFields,
       facetTopns);

     // Currently, only UnInvertedField can deal with multi-part trie
     // fields
     String prefix = TrieField.getMainValuePrefix(ft);
     if (sf.multiValued() || ft.multiValuedFieldCache()
       || prefix != null || !queries.isEmpty()) {
      UnInvertedField uif = UnInvertedField.getUnInvertedField(
        fieldName, searcher);
      if (queries.isEmpty()) {
       facetResult = doGetStatsField(uif, docs, null, isShard,
         fieldName, facets, facetSortFields, facetTopns);

       res.add(facetResult.getName(0), facetResult.getVal(0));
      } else {
       handleMultipleQueries(uif, fieldName, queries,
         labeList, facets, facetSortFields, facetTopns,
         isShard, facetResult);
       res.add(fieldName, facetResult);
      }
     } else {
      NamedList<Object> stv = (NamedList<Object>) getFieldCacheStats(
        fieldName, facets);
      handleStatsTopns(facets, facetSortFields, facetTopns, stv);

      if (isShard || (Long) stv.get("count") > 0) {
       res.add(fieldName, stv);
      } else {
       res.add(fieldName, null);
      }
     }
    }
   }
    return res;
  }
  
  // why does this use a top-level field cache?
  public NamedList<?> getFieldCacheStats(String fieldName, String[] facet ) {
    SchemaField sf = searcher.getSchema().getField(fieldName);
    
    FieldCache.DocTermsIndex si;
    try {
      si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName);
    } 
    catch (IOException e) {
      throw new RuntimeException( "failed to open field cache for: "+fieldName, e );
    }
    StatsValues allstats = StatsValuesFactory.createStatsValues(sf);
    final int nTerms = si.numOrd();
    if ( nTerms <= 0 || docs.size() <= 0 ) return allstats.getStatsValues();

    // don't worry about faceting if no documents match...
    List<FieldFacetStats> facetStats = new ArrayList<FieldFacetStats>();
    FieldCache.DocTermsIndex facetTermsIndex;
    for( String facetField : facet ) {
      SchemaField fsf = searcher.getSchema().getField(facetField);

      if ( fsf.multiValued()) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
          "Stats can only facet on single-valued fields, not: " + facetField );
      }

      try {
        facetTermsIndex = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), facetField);
      }
      catch (IOException e) {
        throw new RuntimeException( "failed to open field cache for: "
          + facetField, e );
      }
      facetStats.add(new FieldFacetStats(facetField, facetTermsIndex, sf, fsf, nTerms));
    }
    
    final BytesRef tempBR = new BytesRef();
    DocIterator iter = docs.iterator();
    while (iter.hasNext()) {
      int docID = iter.nextDoc();
      BytesRef raw = si.lookup(si.getOrd(docID), tempBR);
      if( raw.length > 0 ) {
        allstats.accumulate(raw);
      } else {
        allstats.missing();
      }

      // now update the facets
      for (FieldFacetStats f : facetStats) {
        f.facet(docID, raw);
      }
    }

    for (FieldFacetStats f : facetStats) {
      allstats.addFacet(f.name, f.facetStatsValues);
    }
    return allstats.getStatsValues();
  }

 private void parseSortFieldsAndTopn(String fieldName, String[] facets,
   String[] facetSortFields, Integer[] facetTopns) {
  for (int i = 0; i < facets.length; i++) {
   String facet = facets[i];
   String facetSortField = params.getFieldParam(fieldName,
     "stats.facet." + facet + ".sortfield");
   if (facetSortField != null && !facetSortField.trim().equals("")) {
    facetSortFields[i] = facetSortField;
   }
   String facetTopn = params.getFieldParam(fieldName, "stats.facet."
     + facet + ".topn");
   if (facetTopn != null) {
    facetTopns[i] = Integer.valueOf(facetTopn);
    if (facetSortField == null || facetSortField.trim().equals("")) {
     facetSortFields[i] = "sum";
    }
   }
  }
 }

 private void handleMultipleQueries(final UnInvertedField uif,
   final String filedName, final List<String> queries,
   List<String> labeList, final String[] facets,
   final String[] facetSortFields, final Integer[] facetTopns,
   final boolean isShard, NamedList<Object> facetResult) {
  final List<NamedList<Object>> tmpResult = new ArrayList<NamedList<Object>>();
  for (int i = 0; i < queries.size(); i++) {
   tmpResult.add(null);
  }
  List<Thread> threads = new ArrayList<Thread>();
  for (int i = 0; i < queries.size(); i++) {
   final int index = i;
   Thread thread = new Thread() {
    @Override
    public void run() {
     NamedList<Object> tmpFacetResult = null;
     String tmpQuery = queries.get(index);
     try {
      QParser qparser = QParser.getParser(tmpQuery, "lucene",
        req);
      SolrQueryParser parser = new SolrQueryParser(qparser,
        req.getSchema().getDefaultSearchFieldName());
      Query query = parser.parse(tmpQuery);

      DocSet docSet = searcher.getDocSet(query);
      DocSet interSection = docs.intersection(docSet);
      tmpFacetResult = doGetStatsField(uif, interSection,
        tmpQuery, isShard, filedName, facets,
        facetSortFields, facetTopns);
     } catch (Exception e) {
      tmpFacetResult = new NamedList<Object>();

      NamedList<Object> error = new NamedList<Object>();
      NamedList<Object> value = new NamedList<Object>();
      value.add("msg", e.getMessage());
      StringWriter stacks = new StringWriter();
      e.printStackTrace(new PrintWriter(stacks));
      value.add("trace", stacks.toString());

      error.add("error", value);
      tmpFacetResult.add(tmpQuery, error);
      throw new RuntimeException(e);
     } finally {
      tmpResult.set(index, tmpFacetResult);
     }
    }
   };
   thread.start();
   threads.add(thread);

  }

  for (Thread t : threads) {
   try {
    t.join();
   } catch (InterruptedException e) {
    throw new RuntimeException(e);
   }
  }

  for (int i = 0; i < tmpResult.size(); i++) {
   NamedList<Object> namedList = tmpResult.get(i);
   if (namedList != null && namedList.size() > 0) {
    // namedList should only have one result, maybe empty
    if (labeList.get(i) != null) {
     facetResult.add(labeList.get(i), namedList.getVal(0));
    } else {
     facetResult.add(namedList.getName(0), namedList.getVal(0));
    }
   }
  }
 }

 /**
  * Post condition: size of queries and labesList should be same.
  */
 private void handleQueryParams(final String filedName,
   List<String> queries, List<String> labesList) {
  // convert ranges to queries
  List<String> rangeQuries = handleRangeParmas(filedName);
  for (String query : rangeQuries) {
   queries.add(query);
   labesList.add(null);
  }
  // handle stats.query
  handleStatsQueryParams(filedName, queries, labesList);
 }

 private void handleStatsQueryParams(final String filedName,
   List<String> queries, List<String> labesList) {
  String[] queriesStr = params.getFieldParams(filedName, "stats.query");
  if (queriesStr != null) {
   // handle query label: example:stats.query={!label="Label Name"}qstr
   for (int i = 0; i < queriesStr.length; i++) {
    String oldQuery = queriesStr[i].trim();
    if (oldQuery.startsWith("{!")) {
     int endIndex = oldQuery.indexOf('}');
     if (endIndex < 0) {
      throw new RuntimeException(
        "Local parameter is not correct: " + oldQuery);
     }
     String label = queriesStr[i].substring(2, endIndex).trim();
     if (label.startsWith("label=")) {
      label = label.substring(6).trim();
      if (label.startsWith("\"") || label.startsWith("'")) {
       label = label.substring(1);
      }
      if (label.endsWith("\"") || label.endsWith("'")) {
       label = label.substring(0, label.length() - 1);
      }
      labesList.add(label);
     } else {
      labesList.add(null);
     }
     queries.add(oldQuery.substring(endIndex + 1).trim());
    } else {
     queries.add(oldQuery);
     labesList.add(null);
    }
   }
  }
 }

 private List<String> getRanges(String field, double start, double end,
   double[] gap) {
  List<Double> ranges = new ArrayList<Double>();
  if (gap == null || gap.length == 0) {
   ranges.add(start);
   ranges.add(end);
  } else {
   double boundEnd = start;
   int i = 0;
   int lastIndex = gap.length - 1;
   while (boundEnd <= end) {
    ranges.add(boundEnd);
    if (lastIndex < i) {
     boundEnd += gap[lastIndex];
    } else {
     boundEnd += gap[i];
    }
    i++;
   }
   if (ranges.get(ranges.size() - 1) < end) {
    ranges.add(end);
   }
  }
  List<String> result = new ArrayList<String>();

  FieldType fieldType = searcher.getSchema().getField(field).getType();
  String calssName = fieldType.getClass().toString();
  if (calssName.contains("LongField") || calssName.contains("IntField")
    || calssName.contains("ShortField")) {
   for (int i = 0; i < ranges.size(); i++) {
    result.add(String.valueOf(ranges.get(i).longValue()));
   }
  } else {
   for (int i = 0; i < ranges.size(); i++) {
    result.add(ranges.get(i).toString());
   }
  }
  return result;
 }

 /**
  * @return a list of query strings
  */
 private List<String> handleRangeParmas(final String statsField) {
  String startStr = params.getFieldParam(statsField, "stats.range.start");
  String endStr = params.getFieldParam(statsField, "stats.range.end");
  String gapStr = params.getFieldParam(statsField, "stats.range.gap");
  String rangesStr = params.getFieldParam(statsField, "stats.ranges");

  Double start = null, end = null;
  if (startStr != null) {
   start = Double.parseDouble(startStr);
  }
  if (endStr != null) {
   end = Double.parseDouble(endStr);
  }

  final List<String> queries = new ArrayList<String>();
  List<String> ranges = new ArrayList<String>();
  String fieldName = statsField;
  if (rangesStr != null) {
   int index = rangesStr.indexOf(":");
   if (index > 0) {
    fieldName = rangesStr.substring(0, index);
   }
   ranges = StrUtils.splitSmart(rangesStr.substring(index + 1), ',');
  } else if (gapStr != null) {
   List<String> strs = StrUtils.splitSmart(gapStr, ',');
   double[] gap = new double[strs.size()];
   for (int i = 0; i < strs.size(); i++) {
    gap[i] = Double.parseDouble(strs.get(i));
   }
   if (start != null && end != null & gap != null) {
    ranges = getRanges(statsField, start, end, gap);
   }
  }

  for (int i = 0; i < ranges.size() - 1; i++) {
   queries.add(fieldName + ":[" + ranges.get(i).trim() + " TO "
     + ranges.get(i + 1) + "}");
  }
  return queries;
 }

 /**
  * Call UnInvertedField to get stats and handle sortfields and topn.
  */
 @SuppressWarnings("unchecked")
 private NamedList<Object> doGetStatsField(UnInvertedField uif, DocSet docs,
   String lable, boolean isShard, String fieldName, String[] facets,
   String[] facetSortFields, Integer[] facetTopns) {
  try {
   NamedList<Object> res = new NamedList<Object>();
   NamedList<Object> stv = (NamedList<Object>) uif.getStats(searcher,
     docs, facets).getStatsValues();

   handleStatsTopns(facets, facetSortFields, facetTopns, stv);
   if (isShard || (Long) stv.get("count") > 0) {
    res.add(lable == null ? fieldName : lable, stv);
   } else {
    res.add(lable == null ? fieldName : lable, null);
   }
   return res;
  } catch (Exception e) {
   throw new RuntimeException(e);
  }
 }

  private void handleStatsTopns(String[] facets, String[] facetSortFields,
      Integer[] facetTopns, NamedList<Object> stv) {
    NamedList<NamedList<NamedList<Object>>> fieldFacet = (NamedList<NamedList<NamedList<Object>>>) (stv
        .get("facets"));
    NamedList<NamedList<NamedList<Object>>> topnFacets = new NamedList<NamedList<NamedList<Object>>>();

    boolean updated = false;

    for (int j = 0; j < facets.length; j++) {
      String sortBy = facetSortFields[j];
      Integer topn = facetTopns[j];
      NamedList<NamedList<Object>> fieldFacetList = (fieldFacet)
          .get(facets[j]);
      if (fieldFacetList == null) {
        continue;
      }
      if (topn == null) {
        if (sortBy == null) {
          topnFacets.add(facets[j], fieldFacetList);
          continue;
        }
        topn = fieldFacetList.size();
      }
      updated = true;

      NamedList<NamedList<Object>> namedList =  getStatsTopn(fieldFacetList, topn, sortBy);
      topnFacets.add(facets[j], namedList);
    }
    if (updated) {
      stv.remove("facets");
      stv.add("facets", topnFacets);
    }
  }
  
  private NamedList<NamedList<Object>> getStatsTopn(
      NamedList<NamedList<Object>> fieldFacetList, int topn, String sortBy) {
    Iterator<Map.Entry<String, NamedList<Object>>> iterator = fieldFacetList
        .iterator();
    TreeSetTopnAscendingByValue<String, Double> topnSet = new TreeSetTopnAscendingByValue<String, Double>(topn);
    while (iterator.hasNext()) {
      Map.Entry<String, NamedList<Object>> entry = iterator.next();
      String newLabel = entry.getKey();
      NamedList<?> namedList = entry.getValue();
      double newValue = Double.parseDouble(namedList.get(sortBy).toString());
      topnSet.add(newLabel, newValue);
    }

    NamedList<NamedList<Object>> namedList = new NamedList<NamedList<Object>>();
    Iterator<Entry<String, Double>>  keyIterator = topnSet.descendingIterator();
    while (keyIterator.hasNext()) {
      Entry<String, Double> entry = keyIterator.next();
      String label = entry.getKey();
      namedList.add(label, fieldFacetList.get(label));
    }
    return namedList;
  }

  static class TreeSetTopnAscendingByValue<K extends Comparable<? super K>, V extends Comparable<? super V>>
      extends TreeSet<Map.Entry<K, V>> {
    private static final long serialVersionUID = 1L;
    private int maxSize = Integer.MAX_VALUE;
    private Comparator<Map.Entry<K, V>> comparator;

    public TreeSetTopnAscendingByValue(Comparator<Map.Entry<K, V>> comparator,
        int maxSize) {
      super(comparator);
      this.comparator = comparator;
      this.maxSize = maxSize;
    }

    public TreeSetTopnAscendingByValue(int maxSize) {
      this(new Comparator<Map.Entry<K, V>>() {
        public int compare(Map.Entry<K, V> e1, Map.Entry<K, V> e2) {
          int res = e1.getValue().compareTo(e2.getValue());
          if (res == 0) {
            res = e1.getKey().compareTo(e2.getKey());
          }
          return res;
        }
      }, maxSize);
    }

    public boolean add(K newKey, V newValue) {
      return add(new SimpleEntry<K, V>(newKey, newValue));
    }

    @Override
    public boolean add(Entry<K, V> newValue) {
      boolean added = false;
      if (this.size() > maxSize)
        throw new RuntimeException("Should never happen.");
      if (this.size() == maxSize) {
        Iterator<Entry<K, V>> iterator = this.iterator();
        Entry<K, V> currentMin = iterator.next();
        // only remove currentMin if newValue > currentMin. if equals, do nothing
        // for better performance.
        if (comparator.compare(newValue, currentMin) > 0) {
          added = super.add(newValue);
          // the added element may already exist, if the map doesn't have this
          // element(added is true), remove currentMin
          if (added) {
            iterator = this.iterator();
            iterator.next();
            iterator.remove();
          }
        } 
      } else {
        added = super.add(newValue);
      }
      return added;
    }
  }
}

Resource:
http://wiki.apache.org/solr/StatsComponent
Solr StatsComponent
https://issues.apache.org/jira/browse/SOLR-4239

Extend Guava TreeMultimap to Get Topn elements Sorted by Values

It is a common requirement that we have a Hashmap, we want only return the topn elements sorted on the values, for example, in the following HashMap, the key is username, the value is user's score. We want to return top 2 users with highest scores.
David->85, Jeffery->85, Tom->90, Vivian->95

In previous article, I extend TreeSet to add <key, value> pair as Map.Entry, and sort it by value, and override add method to limit TreeSet size.

It works well, and with good time and space complexity. But one of my colleague argued that: in a map, we should sort by key not by value, so the key should be score, the value should be user name, the data structure should be sorted multi-value map: there can be multiple values for a same key, and it's sorted by key.

Of course this works, but the performance would be a little worse, as for a same key, there can be multiple values, we have to iterate the value list.

Neither am I good at arguing, nor like arguing. However arguing and negotiation are important skills for a software engineer: they are a part of our life, so I should try my best to improve these skills.
I think my solution is better, but I can't clearly express why at that time.
Less Learned: when compare 2 implementations, we should compare: time/space complexity, implementation complexity.

Anyway, I would like to implement my colleague's idea. Just curious, and want to know how to implement a sorted multi-value TreeMap, and limit its size.

Google Guava provides TreeMultimap: there can be multiple values for a same key, and it's sorted by keys.

We only need extend put method to limit its size. It's easy, but the bad part is that TreeMultimap's constructor is package level, to extend it, the subclass must be in the same package.

The code is like below: you can review the complete code at Github.

public class TopnTreeMultimap<K extends Comparable, V extends Comparable>
  extends TreeMultimap<K, V> {
 private final boolean DEBUG = true;
 private static final long serialVersionUID = 1L;

 private int maxSize;

 public TopnTreeMultimap(Comparator<? super K> keyComparator,
   Comparator<? super V> valueComparator, int maxSize) {
  super(keyComparator, valueComparator);
  this.maxSize = maxSize;
 }

 public static <K extends Comparable, V extends Comparable> TopnTreeMultimap<K, V> create(
   int maxSize) {
  return new TopnTreeMultimap<K, V>(Ordering.natural(), Ordering.natural(),
    maxSize);
 }

 @SuppressWarnings("unchecked")
 @Override
 public boolean put(K newKey, V newValue) {
  boolean added = false;
  if (this.size() > maxSize)
   throw new RuntimeException("Code error, should never happen.");
  if (this.size() == maxSize) {
   Iterator<K> keyIterator = this.keySet().iterator();
   K currentMinKey = keyIterator.next();
   if (newKey.compareTo(currentMinKey) <= 0) {
    if (DEBUG) {
     System.out.println("Ignore the put element: " + newKey + " : "
       + newValue);
    }
   } else {
    added = super.put(newKey, newValue);

    if (added) {
     // remove the first element - the min
     Iterator entryiIterator = this.entryIterator();
     entryiIterator.next();
     entryiIterator.remove();
    }
   }
  } else {
   added = super.put(newKey, newValue);
  }
  return added;
 }

 public static void main(String[] args) throws Exception {
  TopnTreeMultimap<Integer, String> instance = TopnTreeMultimap.create(2);
  instance.put(85, "David");
  instance.put(85, "Jeffery");
  instance.put(90, "Tom");
  instance.put(95, "Vivian");
  instance.put(95, "Vivian");

  System.err.println(instance);
  // print {90=[Tom], 95=[Vivian]}
 }
}

Java: Get HashMap's Topn elements Sorted by Values

It is a common requirement that we have a Hashmap, we want to only return the topn elements sorted by the value.
For example, in the following HashMap, the key is username, the value is user's score. We want to return top 2 users with highest scores.
David->85, Jeffery->85, Tom->90, Vivian->95

We may consider using TreeMap, but it is sorted by the key not the value.

In this article, I will construct a new TreeSet subclass, its element is Entry: a key-value pair, we will sort it by value via a custom Comparator:

new Comparator<Map.Entry<K, V>>() {
 public int compare(Map.Entry<K, V> e1, Map.Entry<K, V> e2) {
  int res = e1.getValue().compareTo(e2.getValue());
  if (res == 0) {
   res = e1.getKey().compareTo(e2.getKey());
  }
  return res;
 }
};

We will limit its size to n, by override add method: if its size is less than n, add the entry, if not:
1. If the value of the new entry is less than the minimum-value element(the first element in the TreeSet), then discard the new entry.
2. If the value of the new entry is larger than the first element in the TreeSet, remove the first element, and add the new entry.

The code is like below. You can review the complete source code at Github:

class TreeSetTopnAscendingByValue<K extends Comparable<? super K>, V extends Comparable<? super V>>
 extends TreeSet<Map.Entry<K, V>> {
private static final boolean DEBUG = false;
private static final long serialVersionUID = 1L;

private int maxSize = Integer.MAX_VALUE;
private Comparator<Map.Entry<K, V>> comparator;

public TreeSetTopnAscendingByValue(Comparator<Map.Entry<K, V>> comparator,
  int maxSize) {
 super(comparator);
 this.comparator = comparator;
 this.maxSize = maxSize;
}

public TreeSetTopnAscendingByValue(int maxSize) {
 this(new Comparator<Map.Entry<K, V>>() {
  public int compare(Map.Entry<K, V> e1, Map.Entry<K, V> e2) {
   int res = e1.getValue().compareTo(e2.getValue());
   if (res == 0) {
    res = e1.getKey().compareTo(e2.getKey());
   }
   return res;
  }
 }, maxSize);
}

public boolean add(K newKey, V newValue) {
 return add(new SimpleEntry<K, V>(newKey, newValue));
}

public boolean add(IKeyValue<K, V> obj) {
 return add(new SimpleEntry<K, V>(obj.getKey(), obj.getValue()));
}

@Override
public boolean add(Entry<K, V> newValue) {
 boolean added = false;
 if (this.size() > maxSize)
  throw new RuntimeException("Code error, should never happen.");
 if (this.size() == maxSize) {
  // we can first check whether the map already contains this element, if
  // yes, ignore.
  // or we can first add, then if the size is increased, remove the first
  // element again, maybe the latter is better : we can run test to compare
  // it.
  Iterator<Entry<K, V>> iterator = this.iterator();
  Entry<K, V> currentMin = iterator.next();
  // only remove currentMin if newValue > currentMin. if equals, do nothing
  // for better performance.
  if (comparator.compare(newValue, currentMin) > 0) {
   added = super.add(newValue);
   if (DEBUG) {
    System.out.println("size: " + this.size() + ", add "
      + newValue.toString() + ", remove: " + currentMin);
   }
   // the added element may already exist, if the map doesn't have this
   // element(added is true), remove currentMin
   // or we can us this.size() > maxSize
   if (added) {
    iterator = this.iterator();
    iterator.next();
    iterator.remove();
   }
  } else {
   // don't add newValue;
   if (DEBUG) {
    System.out.println("size: " + this.size() + ", Skip "
      + newValue.toString());
   }
  }
 } else {
  added = super.add(newValue);
 }
 return added;
}


interface IKeyValue<K, V> {
 public K getKey();

 public V getValue();
}

The test code is like below:

public class HashMapSorterByValue {
// http://stackoverflow.com/questions/2864840/treemap-sort-by-value
public static <K extends Comparable<? super K>, V extends Comparable<? super V>> SortedSet<Map.Entry<K, V>> getTopnbyValue(
  Map<K, V> map, int topn) {
 TreeSetTopnAscendingByValue<K, V> sortedEntries = new TreeSetTopnAscendingByValue<K, V>(
   topn);
 sortedEntries.addAll(map.entrySet());
 return sortedEntries;
}

private static void testSortByValues() {
 TreeSetTopnAscendingByValue<String, Integer> sortedEntries = new TreeSetTopnAscendingByValue<String, Integer>(
   3);
 sortedEntries.add("David", 85);
 sortedEntries.add("Jeffery", 85);
 sortedEntries.add("Tom", 90);
 sortedEntries.add("Vivian", 95);
 sortedEntries.add("Jacky", 95);
 sortedEntries.add("Jacky", 95);

 System.out.println(sortedEntries);

 // test iterate
 Iterator<Entry<String, Integer>> iterator = sortedEntries.iterator();
 while (iterator.hasNext()) {
  Entry<String, Integer> entry = iterator.next();
  System.out.println(entry.getKey() + " : " + entry.getValue());
 }
 sortedEntries.remove(new SimpleEntry<String, Integer>("Jacky", 95));
 System.out.println(sortedEntries);
}

private static void testSrotedByPeopleScore() {
 TreeSetTopnAscendingByValue<String, Double> sortedEntries = new TreeSetTopnAscendingByValue<String, Double>(
   2);
 Person p = new Person("David", 85);
 sortedEntries.add(p);
 p = new Person("Jeffery", 85);
 sortedEntries.add(p);
 p = new Person("Tom", 90);
 sortedEntries.add(p);
 p = new Person("Vivian", 95);
 sortedEntries.add(p);

 assertEquals(2, sortedEntries.size());
 Iterator<Entry<String, Double>> iterator = sortedEntries.iterator();
 assertEquals(90.0, iterator.next().getValue());
 assertEquals(95.0, iterator.next().getValue());
 System.out.println(sortedEntries);
}

class Person implements IKeyValue<String, Double> {
 private String name;
 private double score;

 public Person(String name, double score) {
  this.name = name;
  this.score = score;
 }

 public String getName() {
  return name;
 }

 public String getKey() {
  return getName();
 }

 public Double getValue() {
  return score;
 }

 @Override
 public String toString() {
  return name + ":" + score;
 }
}

Labels