TotalHitCountCollector
Collector's collect method is called for each matched docs:
The main methods in the process:
org.apache.lucene.search.IndexSearcher.search(List, Weight, Collector)
org.apache.lucene.search.Weight.DefaultBulkScorer.scoreAll(Collector, Scorer)
TopScoreDocCollector
Create collector:
org.apache.lucene.search.TopScoreDocCollector.create(int, ScoreDoc, boolean)
public static TopScoreDocCollector create(int numHits, ScoreDoc after, boolean docsScoredInOrder) {
if (docsScoredInOrder) {
return after == null
? new InOrderTopScoreDocCollector(numHits)
: new InOrderPagingScoreDocCollector(after, numHits);
} else {
return after == null
? new OutOfOrderTopScoreDocCollector(numHits)
: new OutOfOrderPagingScoreDocCollector(after, numHits);
}
}
The collector put docs into HitQueue(PriorityQueue)
org.apache.lucene.search.TopScoreDocCollector.OutOfOrderTopScoreDocCollector.collect(int)
org.apache.lucene.search.HitQueue.lessThan(ScoreDoc, ScoreDoc)
TopFieldCollector
public static TopFieldCollector create(Sort sort, int numHits, FieldDoc after,
boolean fillFields, boolean trackDocScores, boolean trackMaxScore,
boolean docsScoredInOrder)
throws IOException {
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
if (after == null) {
if (queue.getComparators().length == 1) {
if (docsScoredInOrder) {
if (trackMaxScore) {
return new OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OneComparatorNonScoringCollector(queue, numHits, fillFields);
}
} else {
if (trackMaxScore) {
return new OutOfOrderOneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OutOfOrderOneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OutOfOrderOneComparatorNonScoringCollector(queue, numHits, fillFields);
}
}
}
// multiple comparators.
if (docsScoredInOrder) {
if (trackMaxScore) {
return new MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new MultiComparatorNonScoringCollector(queue, numHits, fillFields);
}
} else {
if (trackMaxScore) {
return new OutOfOrderMultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OutOfOrderMultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OutOfOrderMultiComparatorNonScoringCollector(queue, numHits, fillFields);
}
}
} else {
return new PagingFieldCollector(queue, after, numHits, fillFields, trackDocScores, trackMaxScore);
}
}
org.apache.lucene.search.FieldValueHitQueue
org.apache.lucene.search.FieldValueHitQueue.OneComparatorFieldValueHitQueue
org.apache.lucene.search.FieldValueHitQueue.MultiComparatorsFieldValueHitQueue
Test Lucene Built-in Collectors
public class LearningCollector {
@Before
public void setup() throws IOException {
Utils.writeIndex();
}
@Test
public void testBuiltCollector() throws IOException {
try (Directory directory = FSDirectory.open(new File(
Utils.INDEX_FOLDER_PATH));
DirectoryReader indexReader = DirectoryReader.open(directory);) {
IndexSearcher searcher = new IndexSearcher(indexReader);
usingTotalHitCollector(searcher);
usingTopScoreDocCollector(searcher);
usingTopFieldCollector(searcher);
usingLuceneGroup(searcher);
}
}
private void usingTotalHitCollector(IndexSearcher searcher)
throws IOException {
TotalHitCountCollector collector = new TotalHitCountCollector();
TermQuery query = new TermQuery(new Term("title", "java"));
searcher.search(query, collector);
System.out.println("total hits:" + collector.getTotalHits());
}
private void usingLuceneGroup(IndexSearcher searcher) throws IOException {
String groupField = "title";
TermQuery query = new TermQuery(new Term("title", "java"));
Sort groupSort = new Sort(new SortField("title", Type.STRING));
Sort docSort = new Sort((new SortField("price", Type.INT, true)));
groupBy(searcher, query, groupField, groupSort, docSort);
}
// Use TermFirstPassGroupingCollector, TermSecondPassGroupingCollector,
// CachingCollector, TermAllGroupsCollector,MultiCollector
private void groupBy(IndexSearcher searcher, Query query,
String groupField, Sort groupSort, Sort docSort) throws IOException {
// return ngroups every page
int topNGroups = 10;
int groupOffset = 0;
boolean fillFields = true;
int docOffset = 0;
boolean requiredTotalGroupCount = true;
TermFirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(
groupField, groupSort, topNGroups);
boolean cacheScores = true;
double maxCacheRAMMB = 16.0;
CachingCollector cachedCollector = CachingCollector.create(c1,
cacheScores, maxCacheRAMMB);
searcher.search(query, cachedCollector);
Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(
groupOffset, fillFields);
if (topGroups == null) {
// No groups matched
return;
}
Collector secondPassCollector = null;
boolean getScores = true;
boolean getMaxScores = true;
boolean fillSortFields = true;
int docsPerGroup = 10;
TermSecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(
groupField, topGroups, groupSort, docSort, docsPerGroup,
getScores, getMaxScores, fillSortFields);
// Optionally compute total group count
TermAllGroupsCollector allGroupsCollector = null;
if (requiredTotalGroupCount) {
allGroupsCollector = new TermAllGroupsCollector(groupField);
secondPassCollector = MultiCollector.wrap(c2, allGroupsCollector);
} else {
secondPassCollector = c2;
}
if (cachedCollector.isCached()) {
// Cache fit within maxCacheRAMMB, so we can replay it:
cachedCollector.replay(secondPassCollector);
} else {
// Cache was too large; must re-execute query:
searcher.search(query, secondPassCollector);
}
int totalGroupCount = -1;
int totalHitCount = -1;
int totalGroupedHitCount = -1;
if (requiredTotalGroupCount) {
totalGroupCount = allGroupsCollector.getGroupCount();
}
System.out.println("groupCount: " + totalGroupCount);
TopGroups<BytesRef> groupsResult = c2.getTopGroups(docOffset);
totalHitCount = groupsResult.totalHitCount;
totalGroupedHitCount = groupsResult.totalGroupedHitCount;
System.out.println("groupsResult.totalHitCount:" + totalHitCount);
System.out.println("groupsResult.totalGroupedHitCount:"
+ totalGroupedHitCount);
int groupIdx = 0;
for (GroupDocs<BytesRef> groupDocs : groupsResult.groups) {
groupIdx++;
System.out.println("group[" + groupIdx + "]:"
+ groupDocs.groupValue);
System.out
.println("group[" + groupIdx + "]:" + groupDocs.totalHits);
int docIdx = 0;
for (ScoreDoc scoreDoc : groupDocs.scoreDocs) {
docIdx++;
System.out.println("group[" + groupIdx + "][" + docIdx + "]:"
+ scoreDoc.doc + "/" + scoreDoc.score);
Document doc = searcher.doc(scoreDoc.doc);
System.out.println("group[" + groupIdx + "][" + docIdx + "]:"
+ doc);
}
}
}
private void usingTopFieldCollector(IndexSearcher searcher)
throws IOException {
TermQuery query = new TermQuery(new Term("title", "java"));
// reverse is true: sort=price desc
Sort sort = new Sort(new SortField("price", Type.INT, true));
TopFieldCollector collector = TopFieldCollector.create(sort, 10, false,
false, false, false);
searcher.search(query, collector);
printAndExplainSearchResult(searcher, collector, true, query, "price");
// set these to true: fillFields, trackDocScores, trackMaxScore
collector = TopFieldCollector.create(sort, 10, true, true, true, false);
searcher.search(query, collector);
printAndExplainSearchResult(searcher, collector, true, query, "price");
// sort by multiple field
sort = new Sort(new SortField("price", Type.INT, true), new SortField(
"title", Type.STRING, false));
collector = TopFieldCollector.create(sort, 10, true, true, true, false);
searcher.search(query, collector);
printAndExplainSearchResult(searcher, collector, true, query, "price",
"title");
}
private void usingTopScoreDocCollector(IndexSearcher searcher)
throws IOException {
TermQuery query = new TermQuery(new Term("title", "java"));
TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);
searcher.search(query, collector);
printAndExplainSearchResult(searcher, collector, true, query, "title",
"author");
// TODO: searchAfte example
}
}