Search in sources :

Example 1 with JsonParserOperator

use of io.georocket.util.JsonParserOperator in project georocket by georocket.

the class ImporterVerticle method importJSON.

/**
 * Imports a JSON file from the given input stream into the store
 * @param f the JSON file to read
 * @param correlationId a unique identifier for this import process
 * @param filename the name of the file currently being imported
 * @param timestamp denotes when the import process has started
 * @param layer the layer where the file should be stored (may be null)
 * @param tags the list of tags to attach to the file (may be null)
 * @param properties the map of properties to attach to the file (may be null)
 * @return a single that will emit when the file has been imported
 */
protected Single<Integer> importJSON(ReadStream<Buffer> f, String correlationId, String filename, long timestamp, String layer, List<String> tags, Map<String, Object> properties) {
    UTF8BomFilter bomFilter = new UTF8BomFilter();
    StringWindow window = new StringWindow();
    GeoJsonSplitter splitter = new GeoJsonSplitter(window);
    AtomicInteger processing = new AtomicInteger(0);
    return f.toObservable().map(buf -> (io.vertx.core.buffer.Buffer) buf.getDelegate()).map(bomFilter::filter).doOnNext(window::append).lift(new JsonParserOperator()).flatMap(splitter::onEventObservable).flatMapSingle(result -> {
        IndexMeta indexMeta = new IndexMeta(correlationId, filename, timestamp, tags, properties, null);
        return addToStoreWithPause(result, layer, indexMeta, f, processing);
    }).count().toSingle();
}
Also used : Buffer(io.vertx.rxjava.core.buffer.Buffer) IndexMeta(io.georocket.storage.IndexMeta) StringWindow(io.georocket.util.StringWindow) XMLParserOperator(io.georocket.util.XMLParserOperator) ChunkMeta(io.georocket.storage.ChunkMeta) Window(io.georocket.util.Window) LoggerFactory(io.vertx.core.logging.LoggerFactory) RxStore(io.georocket.storage.RxStore) AbstractVerticle(io.vertx.rxjava.core.AbstractVerticle) Single(rx.Single) JsonParserOperator(io.georocket.util.JsonParserOperator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Result(io.georocket.input.Splitter.Result) Map(java.util.Map) FileSystem(io.vertx.rxjava.core.file.FileSystem) JsonObject(io.vertx.core.json.JsonObject) StoreFactory(io.georocket.storage.StoreFactory) UTF8BomFilter(io.georocket.util.UTF8BomFilter) Logger(io.vertx.core.logging.Logger) Message(io.vertx.rxjava.core.eventbus.Message) XMLSplitter(io.georocket.input.xml.XMLSplitter) OpenOptions(io.vertx.core.file.OpenOptions) GeoJsonSplitter(io.georocket.input.geojson.GeoJsonSplitter) NoStackTraceThrowable(io.vertx.core.impl.NoStackTraceThrowable) FirstLevelSplitter(io.georocket.input.xml.FirstLevelSplitter) Collectors(java.util.stream.Collectors) XMLCRSIndexer(io.georocket.index.xml.XMLCRSIndexer) JsonArray(io.vertx.core.json.JsonArray) List(java.util.List) Stream(java.util.stream.Stream) MimeTypeUtils.belongsTo(io.georocket.util.MimeTypeUtils.belongsTo) ReadStream(io.vertx.rxjava.core.streams.ReadStream) AddressConstants(io.georocket.constants.AddressConstants) RxUtils(io.georocket.util.RxUtils) ConfigConstants(io.georocket.constants.ConfigConstants) Buffer(io.vertx.rxjava.core.buffer.Buffer) StringWindow(io.georocket.util.StringWindow) GeoJsonSplitter(io.georocket.input.geojson.GeoJsonSplitter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) UTF8BomFilter(io.georocket.util.UTF8BomFilter) IndexMeta(io.georocket.storage.IndexMeta) JsonParserOperator(io.georocket.util.JsonParserOperator)

Example 2 with JsonParserOperator

use of io.georocket.util.JsonParserOperator in project georocket by georocket.

the class GeoJsonSplitterTest method split.

private List<Tuple2<GeoJsonChunkMeta, JsonObject>> split(String file) throws IOException {
    byte[] json = IOUtils.toByteArray(GeoJsonSplitterTest.class.getResource(file));
    List<Tuple2<GeoJsonChunkMeta, JsonObject>> chunks = new ArrayList<>();
    StringWindow window = new StringWindow();
    GeoJsonSplitter splitter = new GeoJsonSplitter(window);
    Observable.just(json).map(Buffer::buffer).doOnNext(window::append).lift(new JsonParserOperator()).flatMap(splitter::onEventObservable).toBlocking().forEach(result -> {
        JsonObject o = new JsonObject(result.getChunk());
        chunks.add(Tuple.tuple((GeoJsonChunkMeta) result.getMeta(), o));
    });
    return chunks;
}
Also used : StringWindow(io.georocket.util.StringWindow) Tuple2(org.jooq.lambda.tuple.Tuple2) GeoJsonChunkMeta(io.georocket.storage.GeoJsonChunkMeta) ArrayList(java.util.ArrayList) JsonObject(io.vertx.core.json.JsonObject) JsonParserOperator(io.georocket.util.JsonParserOperator)

Example 3 with JsonParserOperator

use of io.georocket.util.JsonParserOperator in project georocket by georocket.

the class IndexerVerticle method openChunkToDocument.

/**
 * Open a chunk and convert it to an Elasticsearch document. Retry operation
 * several times before failing.
 * @param path the path to the chunk to open
 * @param chunkMeta metadata about the chunk
 * @param indexMeta metadata used to index the chunk
 * @return an observable that emits the document
 */
private Observable<Map<String, Object>> openChunkToDocument(String path, ChunkMeta chunkMeta, IndexMeta indexMeta) {
    return Observable.defer(() -> store.rxGetOne(path).flatMapObservable(chunk -> {
        List<? extends IndexerFactory> factories;
        Operator<? extends StreamEvent, Buffer> parserOperator;
        // select indexers and parser depending on the mime type
        String mimeType = chunkMeta.getMimeType();
        if (belongsTo(mimeType, "application", "xml") || belongsTo(mimeType, "text", "xml")) {
            factories = xmlIndexerFactories;
            parserOperator = new XMLParserOperator();
        } else if (belongsTo(mimeType, "application", "json")) {
            factories = jsonIndexerFactories;
            parserOperator = new JsonParserOperator();
        } else {
            return Observable.error(new NoStackTraceThrowable(String.format("Unexpected mime type '%s' while trying to index " + "chunk '%s'", mimeType, path)));
        }
        // call meta indexers
        Map<String, Object> metaResults = new HashMap<>();
        for (MetaIndexerFactory metaIndexerFactory : metaIndexerFactories) {
            MetaIndexer metaIndexer = metaIndexerFactory.createIndexer();
            metaIndexer.onIndexChunk(path, chunkMeta, indexMeta);
            metaResults.putAll(metaIndexer.getResult());
        }
        // convert chunk to document and close it
        return chunkToDocument(chunk, indexMeta.getFallbackCRSString(), parserOperator, factories).doAfterTerminate(chunk::close).doOnNext(doc -> doc.putAll(metaResults));
    })).retryWhen(makeRetry());
}
Also used : Buffer(io.vertx.core.buffer.Buffer) XMLParserOperator(io.georocket.util.XMLParserOperator) MetaIndexerFactory(io.georocket.index.xml.MetaIndexerFactory) DefaultMetaIndexerFactory(io.georocket.index.generic.DefaultMetaIndexerFactory) HashMap(java.util.HashMap) JsonObject(io.vertx.core.json.JsonObject) MetaIndexer(io.georocket.index.xml.MetaIndexer) JsonParserOperator(io.georocket.util.JsonParserOperator) NoStackTraceThrowable(io.vertx.core.impl.NoStackTraceThrowable)

Example 4 with JsonParserOperator

use of io.georocket.util.JsonParserOperator in project georocket by georocket.

the class GeoJsonGenericAttributeIndexerTest method assertIndexed.

/**
 * Indexes the given JSON file and checks if the result matches the
 * expected properties map
 * @param expected the expected properties map
 * @param jsonFile the JSON file to parse
 * @param context the current test context
 * @throws IOException if the JSON file could not be read
 */
private void assertIndexed(Map<String, Object> expected, String jsonFile, TestContext context) throws IOException {
    String json;
    try (InputStream is = getClass().getResourceAsStream(jsonFile);
        Scanner scanner = new Scanner(is, "UTF-8")) {
        scanner.useDelimiter("\\A");
        json = scanner.next();
    }
    GeoJsonGenericAttributeIndexer indexer = new GeoJsonGenericAttributeIndexer();
    Map<String, Object> expectedMap = ImmutableMap.of("genAttrs", expected);
    Async async = context.async();
    Observable.just(Buffer.buffer(json)).lift(new JsonParserOperator()).doOnNext(indexer::onEvent).last().subscribe(r -> {
        context.assertEquals(expectedMap, indexer.getResult());
        async.complete();
    }, err -> {
        context.fail(err);
    });
}
Also used : Scanner(java.util.Scanner) InputStream(java.io.InputStream) Async(io.vertx.ext.unit.Async) JsonParserOperator(io.georocket.util.JsonParserOperator)

Example 5 with JsonParserOperator

use of io.georocket.util.JsonParserOperator in project georocket by georocket.

the class GeoJsonBoundingBoxIndexerTest method assertIndexed.

/**
 * Indexes the given JSON file and checks if the result matches the
 * expected bounding box
 * @param expected the expected bounding box
 * @param jsonFile the JSON file to parse
 * @param context the current test context
 * @throws IOException if the JSON file could not be read
 */
private void assertIndexed(List<List<Double>> expected, String jsonFile, TestContext context) throws IOException {
    String json;
    try (InputStream is = getClass().getResourceAsStream(jsonFile);
        Scanner scanner = new Scanner(is, "UTF-8")) {
        scanner.useDelimiter("\\A");
        json = scanner.next();
    }
    GeoJsonBoundingBoxIndexer indexer = new GeoJsonBoundingBoxIndexer();
    Map<String, Object> expectedMap = ImmutableMap.of("bbox", ImmutableMap.of("type", "envelope", "coordinates", expected));
    Async async = context.async();
    Observable.just(Buffer.buffer(json)).lift(new JsonParserOperator()).doOnNext(indexer::onEvent).last().subscribe(r -> {
        context.assertEquals(expectedMap, indexer.getResult());
        async.complete();
    }, err -> {
        context.fail(err);
    });
}
Also used : Scanner(java.util.Scanner) InputStream(java.io.InputStream) Async(io.vertx.ext.unit.Async) JsonParserOperator(io.georocket.util.JsonParserOperator)

Aggregations

JsonParserOperator (io.georocket.util.JsonParserOperator)5 JsonObject (io.vertx.core.json.JsonObject)3 StringWindow (io.georocket.util.StringWindow)2 XMLParserOperator (io.georocket.util.XMLParserOperator)2 NoStackTraceThrowable (io.vertx.core.impl.NoStackTraceThrowable)2 Async (io.vertx.ext.unit.Async)2 InputStream (java.io.InputStream)2 Scanner (java.util.Scanner)2 AddressConstants (io.georocket.constants.AddressConstants)1 ConfigConstants (io.georocket.constants.ConfigConstants)1 DefaultMetaIndexerFactory (io.georocket.index.generic.DefaultMetaIndexerFactory)1 MetaIndexer (io.georocket.index.xml.MetaIndexer)1 MetaIndexerFactory (io.georocket.index.xml.MetaIndexerFactory)1 XMLCRSIndexer (io.georocket.index.xml.XMLCRSIndexer)1 Result (io.georocket.input.Splitter.Result)1 GeoJsonSplitter (io.georocket.input.geojson.GeoJsonSplitter)1 FirstLevelSplitter (io.georocket.input.xml.FirstLevelSplitter)1 XMLSplitter (io.georocket.input.xml.XMLSplitter)1 ChunkMeta (io.georocket.storage.ChunkMeta)1 GeoJsonChunkMeta (io.georocket.storage.GeoJsonChunkMeta)1