Java APIs to Build Solr Suggester and Get Suggestion


User Case
Usually we provide Rest APIs to manage Solr, same for suggestor.
This article focuses on how to programmatically build Solr suggester and get suggestions using java code.

The implementation
Please check the end of the article for Solr configuration files.

Build Suggester
In Solr, after we add docs to Solr, we call suggest?suggest.build=true to build the suggestor to make them available for autocompletion.

The only trick here is the suggest.build request doesn't build suggester for all cores in the collection, BUT only builds suggester to the core that receives the request.

We need get all replicas urls of the collection, add them into shards parameter, and also add shards.qt=/suggest:
shards=127.0.0.1:4567/solr/myCollection_shard1_replica3,127.0.0.1:4565/solr/myCollection_shard1_replica2,127.0.0.1:4566/solr/myCollection_shard1_replica1,127.0.0.1:4567/solr/myCollection_shard2_replica3,127.0.0.1:4566/solr/myCollection_shard2_replica1/,127.0.0.1:4565/solr/myCollection_shard2_replica2&shards.qt=/suggest

public void buildSuggester() {
    final SolrQuery solrQuery = new SolrQuery();
    final List<String> urls = getAllSolrCoreUrls(getSolrClient());

    solrQuery.setRequestHandler("/suggest").setParam("suggest.build", "true")
            .setParam(ShardParams.SHARDS, COMMA_JOINER.join(urls))
            .setParam(ShardParams.SHARDS_QT, "/suggest");
    try {
        final QueryResponse queryResponse = getSolrClient().query(solrQuery);
        final int status = queryResponse.getStatus();
        if (status >= 300) {
            throw new BusinessException(ErrorCode.data_access_error,
                    MessageFormat.format("Failed to build suggestions: status: {0}", status));
        }
    } catch (SolrServerException | IOException e) {
        throw new BusinessException(ErrorCode.data_access_error, e, "Failed to build suggestions");
    }
}
public static List<String> getAllSolrCoreUrls(final CloudSolrClient solrClient) {
    final ZkStateReader zkReader = getZKReader(solrClient);
    final ClusterState clusterState = zkReader.getClusterState();

    final Collection<Slice> slices = clusterState.getSlices(solrClient.getDefaultCollection());
    if (slices.isEmpty()) {
        throw new BusinessException(ErrorCode.data_access_error, "No slices");
    }
    return slices.stream().map(slice -> slice.getReplicas()).flatMap(replicas -> replicas.stream())
            .map(replica -> replica.getCoreUrl()).collect(Collectors.toList());
}

private static ZkStateReader getZKReader(final CloudSolrClient solrClient) {
    final ZkStateReader zkReader = solrClient.getZkStateReader();
    if (zkReader == null) {
        // This only happens when we first time call solrClient to do anything
        // Usually we will call solrClient to do something during abolition starts: such as
        // healthCheck, so in most cases, its already connected.
        solrClient.connect();
    }
    return solrClient.getZkStateReader();
}

Get Suggestions


public Set<SearchSuggestion> getSuggestions(final String prefix, final int limit) {
   final Set<SearchSuggestion> result = new LinkedHashSet<>(limit);
   try {
       final SolrQuery solrQuery = new SolrQuery().setRequestHandler("/suggest").setParam("suggest.q", prefix)
               .setParam("suggest.count", String.valueOf(limit)).setParam(CommonParams.TIME_ALLOWED,
                       mergedConfig.getConfigByNameAsString("search.suggestions.time_allowed.millSeconds"));
       // context filters
       solrQuery.setParam("suggest.cfq", getContextFilters());
       final QueryResponse queryResponse = getSolrClient().query(solrQuery);
       if (queryResponse != null) {
           final SuggesterResponse suggesterResponse = queryResponse.getSuggesterResponse();
           final Map<String, List<Suggestion>> map = suggesterResponse.getSuggestions();
           final List<Suggestion> infixSuggesters = map.get("infixSuggester");
           if (infixSuggesters != null) {
               for (final Suggestion suggester : infixSuggesters) {
                   if (result.size() < limit) {
                       result.add(new SearchSuggestion().setText(suggester.getTerm())
                               .setHighlightedText(replaceTagB(suggester.getTerm())));
                   } else {
                       break;
                   }
               }
           }
       }
       logger.info(
               MessageFormat.format("User: {0}, query: {1}, limit: {2}, result: {3}", user, query, limit, result));
       return result;
   } catch (final Exception e) {
       throw new BusinessException(ErrorCode.data_access_error, e, "Failed to get suggestions for " + query);
   }
}
private static final Pattern TAGB_PATTERN = Pattern.compile("<b>|</b>");
public static String replaceTagB(String input)
{
    return TAGB_PATTERN.matcher(input).replaceAll("");
}

Schema.xml
We define textSuggest and suggesterContextField, copy fields which are shown in the autocompletion to textSuggest field, and copy filter fields such as zipCodes, genres to suggesterContextField.

Solr suggester supports filters on multiple fields, all we just need copy all these filter fields to suggesterContextField.


<field name="suggester" type="textSuggest" indexed="true"
  stored="true" multiValued="true" />
<field name="suggesterContextField" type="string" indexed="true" stored="true"
  multiValued="true" />

<copyField source="seriesTitle" dest="suggester" />
<copyField source="programTitle" dest="suggester" />

<copyField source="zipCodes" dest="suggesterContextField" />
<copyField source="genres" dest="suggesterContextField" />
SolrConfig.xml
We can add multiple suggester implementations to searchComponent. Another very useful is FileDictionaryFactory which allows us to using an external file that contains suggest entries. We may use it in future.


<searchComponent name="suggest" class="solr.SuggestComponent">
  <lst name="suggester">
    <str name="name">infixSuggester</str>
    <str name="lookupImpl">BlendedInfixLookupFactory</str>
    <str name="dictionaryImpl">DocumentDictionaryFactory</str>
    <str name="blenderType">position_linear</str>
    <str name="field">suggester</str>
    <str name="contextField">suggesterContextField</str>
    <str name="minPrefixChars">4</str>
    <str name="suggestAnalyzerFieldType">textSuggest</str>
    <str name="indexPath">infix_suggestions</str>
    <str name="highlight">true</str>
    <str name="buildOnStartup">false</str>
    <str name="buildOnCommit">false</str>
  </lst>
</searchComponent>

<requestHandler name="/suggest" class="solr.SearchHandler"
  >
  <lst name="defaults">
    <str name="suggest">true</str>
    <str name="suggest.dictionary">infixSuggester</str>
    <str name="suggest.onlyMorePopular">true</str>
    <str name="suggest.count">10</str>
    <str name="suggest.collate">true</str>
  </lst>
  <arr name="components">
    <str>suggest</str>
  </arr>
</requestHandler>

Resources
Solr Suggester

Labels

adsense (5) Algorithm (69) Algorithm Series (35) Android (7) ANT (6) bat (8) Big Data (7) Blogger (14) Bugs (6) Cache (5) Chrome (19) Code Example (29) Code Quality (7) Coding Skills (5) Database (7) Debug (16) Design (5) Dev Tips (63) Eclipse (32) Git (5) Google (33) Guava (7) How to (9) Http Client (8) IDE (7) Interview (88) J2EE (13) J2SE (49) Java (186) JavaScript (27) JSON (7) Learning code (9) Lesson Learned (6) Linux (26) Lucene-Solr (112) Mac (10) Maven (8) Network (9) Nutch2 (18) Performance (9) PowerShell (11) Problem Solving (11) Programmer Skills (6) regex (5) Scala (6) Security (9) Soft Skills (38) Spring (22) System Design (11) Testing (7) Text Mining (14) Tips (17) Tools (24) Troubleshooting (29) UIMA (9) Web Development (19) Windows (21) xml (5)