Lucene Built-in Collectors


TotalHitCountCollector
Collector's collect method is called for each matched docs:
The main methods in the process:
org.apache.lucene.search.IndexSearcher.search(List, Weight, Collector)
org.apache.lucene.search.Weight.DefaultBulkScorer.scoreAll(Collector, Scorer)

TopScoreDocCollector
Create collector:
org.apache.lucene.search.TopScoreDocCollector.create(int, ScoreDoc, boolean)

public static TopScoreDocCollector create(int numHits, ScoreDoc after, boolean docsScoredInOrder) {
  if (docsScoredInOrder) {
    return after == null 
      ? new InOrderTopScoreDocCollector(numHits) 
      : new InOrderPagingScoreDocCollector(after, numHits);
  } else {
    return after == null
      ? new OutOfOrderTopScoreDocCollector(numHits)
      : new OutOfOrderPagingScoreDocCollector(after, numHits);
  }
}
The collector put docs into HitQueue(PriorityQueue)
org.apache.lucene.search.TopScoreDocCollector.OutOfOrderTopScoreDocCollector.collect(int)
org.apache.lucene.search.HitQueue.lessThan(ScoreDoc, ScoreDoc)
TopFieldCollector
public static TopFieldCollector create(Sort sort, int numHits, FieldDoc after,
    boolean fillFields, boolean trackDocScores, boolean trackMaxScore,
    boolean docsScoredInOrder)
    throws IOException {
  FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
  if (after == null) {
    if (queue.getComparators().length == 1) {
      if (docsScoredInOrder) {
        if (trackMaxScore) {
          return new OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
        } else if (trackDocScores) {
          return new OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
        } else {
          return new OneComparatorNonScoringCollector(queue, numHits, fillFields);
        }
      } else {
        if (trackMaxScore) {
          return new OutOfOrderOneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
        } else if (trackDocScores) {
          return new OutOfOrderOneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
        } else {
          return new OutOfOrderOneComparatorNonScoringCollector(queue, numHits, fillFields);
        }
      }
    }
    // multiple comparators.
    if (docsScoredInOrder) {
      if (trackMaxScore) {
        return new MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
      } else if (trackDocScores) {
        return new MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
      } else {
        return new MultiComparatorNonScoringCollector(queue, numHits, fillFields);
      }
    } else {
      if (trackMaxScore) {
        return new OutOfOrderMultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
      } else if (trackDocScores) {
        return new OutOfOrderMultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
      } else {
        return new OutOfOrderMultiComparatorNonScoringCollector(queue, numHits, fillFields);
      }
    }
  } else {
    return new PagingFieldCollector(queue, after, numHits, fillFields, trackDocScores, trackMaxScore);
  }
}
org.apache.lucene.search.FieldValueHitQueue org.apache.lucene.search.FieldValueHitQueue.OneComparatorFieldValueHitQueue org.apache.lucene.search.FieldValueHitQueue.MultiComparatorsFieldValueHitQueue

Test Lucene Built-in Collectors
public class LearningCollector {

 @Before
 public void setup() throws IOException {
  Utils.writeIndex();
 }

 @Test
 public void testBuiltCollector() throws IOException {
  try (Directory directory = FSDirectory.open(new File(
    Utils.INDEX_FOLDER_PATH));
    DirectoryReader indexReader = DirectoryReader.open(directory);) {
   IndexSearcher searcher = new IndexSearcher(indexReader);

   usingTotalHitCollector(searcher);
   usingTopScoreDocCollector(searcher);
   usingTopFieldCollector(searcher);
   usingLuceneGroup(searcher);
  }
 }

 private void usingTotalHitCollector(IndexSearcher searcher)
   throws IOException {
  TotalHitCountCollector collector = new TotalHitCountCollector();
  TermQuery query = new TermQuery(new Term("title", "java"));
  searcher.search(query, collector);
  System.out.println("total hits:" + collector.getTotalHits());
 }

 private void usingLuceneGroup(IndexSearcher searcher) throws IOException {
  String groupField = "title";
  TermQuery query = new TermQuery(new Term("title", "java"));
  Sort groupSort = new Sort(new SortField("title", Type.STRING));
  Sort docSort = new Sort((new SortField("price", Type.INT, true)));
  groupBy(searcher, query, groupField, groupSort, docSort);
 }

 // Use TermFirstPassGroupingCollector, TermSecondPassGroupingCollector,
 // CachingCollector, TermAllGroupsCollector,MultiCollector
 private void groupBy(IndexSearcher searcher, Query query,
   String groupField, Sort groupSort, Sort docSort) throws IOException {
  // return ngroups every page
  int topNGroups = 10;
  int groupOffset = 0;
  boolean fillFields = true;

  int docOffset = 0;
  boolean requiredTotalGroupCount = true;

  TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(
    groupField, groupSort, topNGroups);
  boolean cacheScores = true;
  double maxCacheRAMMB = 16.0;
  CachingCollector cachedCollector = CachingCollector.create(c1,
    cacheScores, maxCacheRAMMB);
  searcher.search(query, cachedCollector);

  Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(
    groupOffset, fillFields);

  if (topGroups == null) {
   // No groups matched
   return;
  }

  Collector secondPassCollector = null;

  boolean getScores = true;
  boolean getMaxScores = true;
  boolean fillSortFields = true;
  int docsPerGroup = 10;
  TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(
    groupField, topGroups, groupSort, docSort, docsPerGroup,
    getScores, getMaxScores, fillSortFields);

  // Optionally compute total group count
  TermAllGroupsCollector allGroupsCollector = null;
  if (requiredTotalGroupCount) {
   allGroupsCollector = new TermAllGroupsCollector(groupField);
   secondPassCollector = MultiCollector.wrap(c2, allGroupsCollector);
  } else {
   secondPassCollector = c2;
  }

  if (cachedCollector.isCached()) {
   // Cache fit within maxCacheRAMMB, so we can replay it:
   cachedCollector.replay(secondPassCollector);
  } else {
   // Cache was too large; must re-execute query:
   searcher.search(query, secondPassCollector);
  }

  int totalGroupCount = -1;
  int totalHitCount = -1;
  int totalGroupedHitCount = -1;
  if (requiredTotalGroupCount) {
   totalGroupCount = allGroupsCollector.getGroupCount();
  }
  System.out.println("groupCount: " + totalGroupCount);

  TopGroups<BytesRef> groupsResult = c2.getTopGroups(docOffset);
  totalHitCount = groupsResult.totalHitCount;
  totalGroupedHitCount = groupsResult.totalGroupedHitCount;
  System.out.println("groupsResult.totalHitCount:" + totalHitCount);
  System.out.println("groupsResult.totalGroupedHitCount:"
    + totalGroupedHitCount);

  int groupIdx = 0;
  for (GroupDocs<BytesRef> groupDocs : groupsResult.groups) {
   groupIdx++;
   System.out.println("group[" + groupIdx + "]:"
     + groupDocs.groupValue);
   System.out
     .println("group[" + groupIdx + "]:" + groupDocs.totalHits);
   int docIdx = 0;
   for (ScoreDoc scoreDoc : groupDocs.scoreDocs) {
    docIdx++;
    System.out.println("group[" + groupIdx + "][" + docIdx + "]:"
      + scoreDoc.doc + "/" + scoreDoc.score);
    Document doc = searcher.doc(scoreDoc.doc);
    System.out.println("group[" + groupIdx + "][" + docIdx + "]:"
      + doc);
   }
  }
 }

 private void usingTopFieldCollector(IndexSearcher searcher)
   throws IOException {
  TermQuery query = new TermQuery(new Term("title", "java"));
  // reverse is true: sort=price desc
  Sort sort = new Sort(new SortField("price", Type.INT, true));
  TopFieldCollector collector = TopFieldCollector.create(sort, 10, false,
    false, false, false);

  searcher.search(query, collector);
  printAndExplainSearchResult(searcher, collector, true, query, "price");
  // set these to true: fillFields, trackDocScores, trackMaxScore
  collector = TopFieldCollector.create(sort, 10, true, true, true, false);

  searcher.search(query, collector);
  printAndExplainSearchResult(searcher, collector, true, query, "price");

  // sort by multiple field
  sort = new Sort(new SortField("price", Type.INT, true), new SortField(
    "title", Type.STRING, false));
  collector = TopFieldCollector.create(sort, 10, true, true, true, false);

  searcher.search(query, collector);
  printAndExplainSearchResult(searcher, collector, true, query, "price",
    "title");
 }

 private void usingTopScoreDocCollector(IndexSearcher searcher)
   throws IOException {
  TermQuery query = new TermQuery(new Term("title", "java"));
  TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);
  searcher.search(query, collector);
  printAndExplainSearchResult(searcher, collector, true, query, "title",
    "author");
  // TODO: searchAfte example
 }
}

Labels

adsense (5) Algorithm (69) Algorithm Series (35) Android (7) ANT (6) bat (8) Big Data (7) Blogger (14) Bugs (6) Cache (5) Chrome (19) Code Example (29) Code Quality (7) Coding Skills (5) Database (7) Debug (16) Design (5) Dev Tips (63) Eclipse (32) Git (5) Google (33) Guava (7) How to (9) Http Client (8) IDE (7) Interview (88) J2EE (13) J2SE (49) Java (186) JavaScript (27) JSON (7) Learning code (9) Lesson Learned (6) Linux (26) Lucene-Solr (112) Mac (10) Maven (8) Network (9) Nutch2 (18) Performance (9) PowerShell (11) Problem Solving (11) Programmer Skills (6) regex (5) Scala (6) Security (9) Soft Skills (38) Spring (22) System Design (11) Testing (7) Text Mining (14) Tips (17) Tools (24) Troubleshooting (29) UIMA (9) Web Development (19) Windows (21) xml (5)