My Solr client talks with a proxy application which talks with remote Solr Server to get data.
In previous post, Solr: Use JSON(GSon) Streaming to Reduce Memory Usage
I described the problem we faced, how to use JSON(GSon) Streaming, and also some other approaches to reduce memory usage. In this post I will use XML SAX Parser to iterative xml response stream. In next post I will introduce how to use Stax Parser to parse XML response.
Implementation
The code to use SAX to read document one by one from http stream:
-- Use SAX parser and Java Executors Future to wait all thread finished: all docs imported.
Resources
Parsing XML using DOM, SAX and StAX Parser in Java
Java SAX vs. StAX
In previous post, Solr: Use JSON(GSon) Streaming to Reduce Memory Usage
I described the problem we faced, how to use JSON(GSon) Streaming, and also some other approaches to reduce memory usage. In this post I will use XML SAX Parser to iterative xml response stream. In next post I will introduce how to use Stax Parser to parse XML response.
Implementation
The code to use SAX to read document one by one from http stream:
-- Use SAX parser and Java Executors Future to wait all thread finished: all docs imported.
private static ImportedResult handleXMLResponseViaSax(
SolrQueryRequest request, InputStream in, int fetchSize)
throws IOException, ParserConfigurationException, SAXException {
ImportedResult importedResult = new ImportedResult();
SAXParserFactory parserFactor = SAXParserFactory.newInstance();
SAXParser parser = parserFactor.newSAXParser();
SolrResponseHandler handler = new SolrResponseHandler(request);
parser.parse(in, handler);
importedResult.setFetched(handler.fetchedSize);
importedResult
.setHasMore((handler.fetchedSize + handler.start) < handler.numFound);
importedResult.setImportedData((handler.fetchedSize != 0));
return importedResult;
}
private static class SolrResponseHandler extends DefaultHandler {
protected int fetchedSize = 0;
protected int numFound = -1, start = -1;
protected String contentid, bindoc = null;
protected List<Future<Void>> futures = new ArrayList<Future<Void>>();
String curName, curValue;
private SolrQueryRequest request;
public SolrResponseHandler(SolrQueryRequest request) {
this.request = request;
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
switch (qName) {
case "result": {
numFound = Integer.valueOf(attributes.getValue("numFound"));
start = Integer.valueOf(attributes.getValue("start"));
break;
}
case "str": {
String name = attributes.getValue("name");
if ("contentid".equals(name)) {
curName = "contentid";
} else if ("bindoc".equals(name)) {
curName = "bindoc";
}
break;
}
default:
break;
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
switch (qName) {
case "str": {
if ("contentid".equals(curName)) {
contentid = curValue;
} else if ("bindoc".equals(curName)) {
bindoc = curValue;
}
break;
}
case "doc": {
++fetchedSize;
futures.add(CVSyncDataImporter.getInstance().importData(request,
contentid, bindoc));
break;
}
default:
break;
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
curValue = String.copyValueOf(ch, start, length).trim();
}
}
Parsing XML using DOM, SAX and StAX Parser in Java
Java SAX vs. StAX