In practice, we may want to highlight the matched word in the query response, so user can easily look at the matched section and jump to it.
Highlighter in Solr
https://cwiki.apache.org/confluence/display/solr/Highlighting
http://wiki.apache.org/solr/HighlightingParameters
package org.lifelongprogrammer.learningLucene;
public class LuceneHighlighterInAction {
public static void main(String[] args) throws Exception {
Directory directory = new RAMDirectory();
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
String fieldName = "content";
writeDocs(directory, analyzer, fieldName);
// use Highlighter
try (DirectoryReader indexReader = DirectoryReader.open(directory);) {
IndexSearcher searcher = new IndexSearcher(indexReader);
TermQuery query = new TermQuery(new Term(fieldName, "love"));
TopDocs topDocs = searcher.search(query, 10);
System.out.println("Total hits: " + topDocs.totalHits);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
// use SimpleHTMLFormatter
System.out.println("use SimpleHTMLFormatter");
QueryScorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(
"<font color='red'>", "</font>"), scorer);
Fragmenter fragmenter = new SimpleFragmenter(200);
highlighter.setTextFragmenter(fragmenter);
for (int i = 0; i < Math.min(scoreDocs.length, 10); ++i) {
Document doc = searcher.doc(scoreDocs[i].doc);
String fieldContent = doc.get(fieldName);
System.out.println(fieldContent + " , " + scoreDocs[i].score);
System.out.println(highlighter.getBestFragment(analyzer,
fieldName, fieldContent));
}
// use SimpleSpanFragmenter
System.out.println("use SimpleSpanFragmenter");
highlighter = new Highlighter(scorer);
//default is Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE 50*1024
highlighter.setMaxDocCharsToAnalyze(10240);
fragmenter = new SimpleSpanFragmenter(new QueryScorer(query), 10);
for (int i = 0; i < Math.min(scoreDocs.length, 10); ++i) {
Document doc = searcher.doc(scoreDocs[i].doc);
String fieldContent = doc.get(fieldName);
System.out.println(fieldContent + " , " + scoreDocs[i].score);
TokenStream tokenStream = analyzer.tokenStream(fieldName,
fieldContent);
String result = highlighter.getBestFragments(tokenStream,
fieldContent, 2, "...");
System.out.println(result);
}
}
}
private static void writeDocs(Directory directory,
StandardAnalyzer analyzer, String fieldName) throws IOException {
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9,
analyzer);
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
try (IndexWriter writer = new IndexWriter(directory, config)) {
FieldType fieldType = new FieldType();
fieldType.setIndexed(true);
fieldType.setStored(true);
fieldType.setTokenized(true);
fieldType.setStoreTermVectors(true);
fieldType.setStoreTermVectorOffsets(true);
fieldType.setStoreTermVectorPositions(true);
fieldType.setOmitNorms(false);
fieldType.freeze();
Document doc = new Document();
doc.add(new Field(
fieldName,
"I am a lifelong programmer, I love coding; I am a lifelong programmer, I love programming.",
fieldType));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field(
fieldName,
"I am a lifelong programmer, I love the world; I am a lifelong programmer, I love the life.",
fieldType));
writer.addDocument(doc);
}
}
}
Main code:
org.apache.lucene.search.highlight.Highlighter.getBestTextFragments(TokenStream, String, boolean, int) Highlighter in Solr
https://cwiki.apache.org/confluence/display/solr/Highlighting
http://wiki.apache.org/solr/HighlightingParameters