use of io.georocket.util.XMLParserOperator in project georocket by georocket.
the class ImporterVerticle method importXML.
/**
* Imports an XML file from the given input stream into the store
* @param f the XML file to read
* @param correlationId a unique identifier for this import process
* @param filename the name of the file currently being imported
* @param timestamp denotes when the import process has started
* @param layer the layer where the file should be stored (may be null)
* @param tags the list of tags to attach to the file (may be null)
* @param properties the map of properties to attach to the file (may be null)
* @param fallbackCRSString the CRS which should be used if the imported
* file does not specify one (may be <code>null</code>)
* @return a single that will emit when the file has been imported
*/
protected Single<Integer> importXML(ReadStream<Buffer> f, String correlationId, String filename, long timestamp, String layer, List<String> tags, Map<String, Object> properties, String fallbackCRSString) {
UTF8BomFilter bomFilter = new UTF8BomFilter();
Window window = new Window();
XMLSplitter splitter = new FirstLevelSplitter(window);
AtomicInteger processing = new AtomicInteger(0);
XMLCRSIndexer crsIndexer = new XMLCRSIndexer();
return f.toObservable().map(buf -> (io.vertx.core.buffer.Buffer) buf.getDelegate()).map(bomFilter::filter).doOnNext(window::append).lift(new XMLParserOperator()).doOnNext(e -> {
// save the first CRS found in the file
if (crsIndexer.getCRS() == null) {
crsIndexer.onEvent(e);
}
}).flatMap(splitter::onEventObservable).flatMapSingle(result -> {
String crsString = fallbackCRSString;
if (crsIndexer.getCRS() != null) {
crsString = crsIndexer.getCRS();
}
IndexMeta indexMeta = new IndexMeta(correlationId, filename, timestamp, tags, properties, crsString);
return addToStoreWithPause(result, layer, indexMeta, f, processing);
}).count().toSingle();
}
use of io.georocket.util.XMLParserOperator in project georocket by georocket.
the class IndexerVerticle method openChunkToDocument.
/**
* Open a chunk and convert it to an Elasticsearch document. Retry operation
* several times before failing.
* @param path the path to the chunk to open
* @param chunkMeta metadata about the chunk
* @param indexMeta metadata used to index the chunk
* @return an observable that emits the document
*/
private Observable<Map<String, Object>> openChunkToDocument(String path, ChunkMeta chunkMeta, IndexMeta indexMeta) {
return Observable.defer(() -> store.rxGetOne(path).flatMapObservable(chunk -> {
List<? extends IndexerFactory> factories;
Operator<? extends StreamEvent, Buffer> parserOperator;
// select indexers and parser depending on the mime type
String mimeType = chunkMeta.getMimeType();
if (belongsTo(mimeType, "application", "xml") || belongsTo(mimeType, "text", "xml")) {
factories = xmlIndexerFactories;
parserOperator = new XMLParserOperator();
} else if (belongsTo(mimeType, "application", "json")) {
factories = jsonIndexerFactories;
parserOperator = new JsonParserOperator();
} else {
return Observable.error(new NoStackTraceThrowable(String.format("Unexpected mime type '%s' while trying to index " + "chunk '%s'", mimeType, path)));
}
// call meta indexers
Map<String, Object> metaResults = new HashMap<>();
for (MetaIndexerFactory metaIndexerFactory : metaIndexerFactories) {
MetaIndexer metaIndexer = metaIndexerFactory.createIndexer();
metaIndexer.onIndexChunk(path, chunkMeta, indexMeta);
metaResults.putAll(metaIndexer.getResult());
}
// convert chunk to document and close it
return chunkToDocument(chunk, indexMeta.getFallbackCRSString(), parserOperator, factories).doAfterTerminate(chunk::close).doOnNext(doc -> doc.putAll(metaResults));
})).retryWhen(makeRetry());
}
Aggregations