Search in sources :

Example 1 with MetaIndexerFactory

use of io.georocket.index.xml.MetaIndexerFactory in project georocket by georocket.

the class IndexerVerticle method openChunkToDocument.

/**
 * Open a chunk and convert it to an Elasticsearch document. Retry operation
 * several times before failing.
 * @param path the path to the chunk to open
 * @param chunkMeta metadata about the chunk
 * @param indexMeta metadata used to index the chunk
 * @return an observable that emits the document
 */
private Observable<Map<String, Object>> openChunkToDocument(String path, ChunkMeta chunkMeta, IndexMeta indexMeta) {
    return Observable.defer(() -> store.rxGetOne(path).flatMapObservable(chunk -> {
        List<? extends IndexerFactory> factories;
        Operator<? extends StreamEvent, Buffer> parserOperator;
        // select indexers and parser depending on the mime type
        String mimeType = chunkMeta.getMimeType();
        if (belongsTo(mimeType, "application", "xml") || belongsTo(mimeType, "text", "xml")) {
            factories = xmlIndexerFactories;
            parserOperator = new XMLParserOperator();
        } else if (belongsTo(mimeType, "application", "json")) {
            factories = jsonIndexerFactories;
            parserOperator = new JsonParserOperator();
        } else {
            return Observable.error(new NoStackTraceThrowable(String.format("Unexpected mime type '%s' while trying to index " + "chunk '%s'", mimeType, path)));
        }
        // call meta indexers
        Map<String, Object> metaResults = new HashMap<>();
        for (MetaIndexerFactory metaIndexerFactory : metaIndexerFactories) {
            MetaIndexer metaIndexer = metaIndexerFactory.createIndexer();
            metaIndexer.onIndexChunk(path, chunkMeta, indexMeta);
            metaResults.putAll(metaIndexer.getResult());
        }
        // convert chunk to document and close it
        return chunkToDocument(chunk, indexMeta.getFallbackCRSString(), parserOperator, factories).doAfterTerminate(chunk::close).doOnNext(doc -> doc.putAll(metaResults));
    })).retryWhen(makeRetry());
}
Also used : Buffer(io.vertx.core.buffer.Buffer) XMLParserOperator(io.georocket.util.XMLParserOperator) MetaIndexerFactory(io.georocket.index.xml.MetaIndexerFactory) DefaultMetaIndexerFactory(io.georocket.index.generic.DefaultMetaIndexerFactory) HashMap(java.util.HashMap) JsonObject(io.vertx.core.json.JsonObject) MetaIndexer(io.georocket.index.xml.MetaIndexer) JsonParserOperator(io.georocket.util.JsonParserOperator) NoStackTraceThrowable(io.vertx.core.impl.NoStackTraceThrowable)

Example 2 with MetaIndexerFactory

use of io.georocket.index.xml.MetaIndexerFactory in project georocket by georocket.

the class IndexerVerticle method start.

@Override
public void start(Future<Void> startFuture) {
    // True if the indexer and other verticles should report their activities
    // to the Vert.x event bus (mostly useful for GeoRocket plug-ins)
    reportActivities = config().getBoolean(ConfigConstants.REPORT_ACTIVITIES, false);
    maxBulkSize = config().getInteger(ConfigConstants.INDEX_MAX_BULK_SIZE, ConfigConstants.DEFAULT_INDEX_MAX_BULK_SIZE);
    maxParallelInserts = config().getInteger(ConfigConstants.INDEX_MAX_PARALLEL_INSERTS, ConfigConstants.DEFAULT_INDEX_MAX_PARALLEL_INSERTS);
    // load and copy all indexer factories now and not lazily to avoid
    // concurrent modifications to the service loader's internal cache
    indexerFactories = ImmutableList.copyOf(ServiceLoader.load(IndexerFactory.class));
    xmlIndexerFactories = ImmutableList.copyOf(Seq.seq(indexerFactories).filter(f -> f instanceof XMLIndexerFactory).cast(XMLIndexerFactory.class));
    jsonIndexerFactories = ImmutableList.copyOf(Seq.seq(indexerFactories).filter(f -> f instanceof JsonIndexerFactory).cast(JsonIndexerFactory.class));
    metaIndexerFactories = ImmutableList.copyOf(Seq.seq(indexerFactories).filter(f -> f instanceof MetaIndexerFactory).cast(MetaIndexerFactory.class));
    store = new RxStore(StoreFactory.createStore(getVertx()));
    queryCompiler = createQueryCompiler();
    queryCompiler.setQueryCompilers(indexerFactories);
    new ElasticsearchClientFactory(vertx).createElasticsearchClient(INDEX_NAME).doOnNext(es -> {
        client = es;
    }).flatMap(v -> client.ensureIndex()).flatMap(v -> ensureMapping()).subscribe(es -> {
        registerMessageConsumers();
        startFuture.complete();
    }, err -> {
        startFuture.fail(err);
    });
}
Also used : MetaIndexer(io.georocket.index.xml.MetaIndexer) GeoJsonChunkMeta(io.georocket.storage.GeoJsonChunkMeta) IndexMeta(io.georocket.storage.IndexMeta) StreamEvent(io.georocket.util.StreamEvent) XMLChunkMeta(io.georocket.storage.XMLChunkMeta) XMLParserOperator(io.georocket.util.XMLParserOperator) ChunkMeta(io.georocket.storage.ChunkMeta) RxStore(io.georocket.storage.RxStore) Tuple2(org.jooq.lambda.tuple.Tuple2) Tuple3(org.jooq.lambda.tuple.Tuple3) JsonParserOperator(io.georocket.util.JsonParserOperator) Map(java.util.Map) JsonObject(io.vertx.core.json.JsonObject) Logger(io.vertx.core.logging.Logger) MetaIndexerFactory(io.georocket.index.xml.MetaIndexerFactory) Message(io.vertx.rxjava.core.eventbus.Message) ServiceLoader(java.util.ServiceLoader) Collectors(java.util.stream.Collectors) Future(io.vertx.core.Future) List(java.util.List) ElasticsearchClientFactory(io.georocket.index.elasticsearch.ElasticsearchClientFactory) Stream(java.util.stream.Stream) Tuple(org.jooq.lambda.tuple.Tuple) MapUtils(io.georocket.util.MapUtils) Buffer(io.vertx.core.buffer.Buffer) MimeTypeUtils.belongsTo(io.georocket.util.MimeTypeUtils.belongsTo) RxHelper(io.vertx.rx.java.RxHelper) AddressConstants(io.georocket.constants.AddressConstants) ChunkReadStream(io.georocket.storage.ChunkReadStream) Operator(rx.Observable.Operator) HashMap(java.util.HashMap) Seq(org.jooq.lambda.Seq) DefaultQueryCompiler(io.georocket.query.DefaultQueryCompiler) LoggerFactory(io.vertx.core.logging.LoggerFactory) ArrayList(java.util.ArrayList) AbstractVerticle(io.vertx.rxjava.core.AbstractVerticle) Observable(rx.Observable) Func1(rx.functions.Func1) ImmutableList(com.google.common.collect.ImmutableList) XMLIndexerFactory(io.georocket.index.xml.XMLIndexerFactory) StoreFactory(io.georocket.storage.StoreFactory) JsonIndexerFactory(io.georocket.index.xml.JsonIndexerFactory) NoStackTraceThrowable(io.vertx.core.impl.NoStackTraceThrowable) ThrowableHelper.throwableToMessage(io.georocket.util.ThrowableHelper.throwableToMessage) StreamIndexer(io.georocket.index.xml.StreamIndexer) TimeUnit(java.util.concurrent.TimeUnit) JsonArray(io.vertx.core.json.JsonArray) ThrowableHelper.throwableToCode(io.georocket.util.ThrowableHelper.throwableToCode) ElasticsearchClient(io.georocket.index.elasticsearch.ElasticsearchClient) RxUtils(io.georocket.util.RxUtils) DefaultMetaIndexerFactory(io.georocket.index.generic.DefaultMetaIndexerFactory) ConfigConstants(io.georocket.constants.ConfigConstants) JsonChunkMeta(io.georocket.storage.JsonChunkMeta) ElasticsearchClientFactory(io.georocket.index.elasticsearch.ElasticsearchClientFactory) JsonIndexerFactory(io.georocket.index.xml.JsonIndexerFactory) MetaIndexerFactory(io.georocket.index.xml.MetaIndexerFactory) DefaultMetaIndexerFactory(io.georocket.index.generic.DefaultMetaIndexerFactory) RxStore(io.georocket.storage.RxStore) XMLIndexerFactory(io.georocket.index.xml.XMLIndexerFactory)

Aggregations

DefaultMetaIndexerFactory (io.georocket.index.generic.DefaultMetaIndexerFactory)2 MetaIndexer (io.georocket.index.xml.MetaIndexer)2 MetaIndexerFactory (io.georocket.index.xml.MetaIndexerFactory)2 JsonParserOperator (io.georocket.util.JsonParserOperator)2 XMLParserOperator (io.georocket.util.XMLParserOperator)2 Buffer (io.vertx.core.buffer.Buffer)2 NoStackTraceThrowable (io.vertx.core.impl.NoStackTraceThrowable)2 JsonObject (io.vertx.core.json.JsonObject)2 HashMap (java.util.HashMap)2 ImmutableList (com.google.common.collect.ImmutableList)1 AddressConstants (io.georocket.constants.AddressConstants)1 ConfigConstants (io.georocket.constants.ConfigConstants)1 ElasticsearchClient (io.georocket.index.elasticsearch.ElasticsearchClient)1 ElasticsearchClientFactory (io.georocket.index.elasticsearch.ElasticsearchClientFactory)1 JsonIndexerFactory (io.georocket.index.xml.JsonIndexerFactory)1 StreamIndexer (io.georocket.index.xml.StreamIndexer)1 XMLIndexerFactory (io.georocket.index.xml.XMLIndexerFactory)1 DefaultQueryCompiler (io.georocket.query.DefaultQueryCompiler)1 ChunkMeta (io.georocket.storage.ChunkMeta)1 ChunkReadStream (io.georocket.storage.ChunkReadStream)1