Search in sources :

Example 1 with Window

use of io.georocket.util.Window in project georocket by georocket.

the class ImporterVerticle method importJSON.

/**
 * Imports a JSON file from the given input stream into the store
 * @param f the JSON file to read
 * @param correlationId a unique identifier for this import process
 * @param filename the name of the file currently being imported
 * @param timestamp denotes when the import process has started
 * @param layer the layer where the file should be stored (may be null)
 * @param tags the list of tags to attach to the file (may be null)
 * @param properties the map of properties to attach to the file (may be null)
 * @return a single that will emit when the file has been imported
 */
protected Single<Integer> importJSON(ReadStream<Buffer> f, String correlationId, String filename, long timestamp, String layer, List<String> tags, Map<String, Object> properties) {
    UTF8BomFilter bomFilter = new UTF8BomFilter();
    StringWindow window = new StringWindow();
    GeoJsonSplitter splitter = new GeoJsonSplitter(window);
    AtomicInteger processing = new AtomicInteger(0);
    return f.toObservable().map(buf -> (io.vertx.core.buffer.Buffer) buf.getDelegate()).map(bomFilter::filter).doOnNext(window::append).lift(new JsonParserOperator()).flatMap(splitter::onEventObservable).flatMapSingle(result -> {
        IndexMeta indexMeta = new IndexMeta(correlationId, filename, timestamp, tags, properties, null);
        return addToStoreWithPause(result, layer, indexMeta, f, processing);
    }).count().toSingle();
}
Also used : Buffer(io.vertx.rxjava.core.buffer.Buffer) IndexMeta(io.georocket.storage.IndexMeta) StringWindow(io.georocket.util.StringWindow) XMLParserOperator(io.georocket.util.XMLParserOperator) ChunkMeta(io.georocket.storage.ChunkMeta) Window(io.georocket.util.Window) LoggerFactory(io.vertx.core.logging.LoggerFactory) RxStore(io.georocket.storage.RxStore) AbstractVerticle(io.vertx.rxjava.core.AbstractVerticle) Single(rx.Single) JsonParserOperator(io.georocket.util.JsonParserOperator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Result(io.georocket.input.Splitter.Result) Map(java.util.Map) FileSystem(io.vertx.rxjava.core.file.FileSystem) JsonObject(io.vertx.core.json.JsonObject) StoreFactory(io.georocket.storage.StoreFactory) UTF8BomFilter(io.georocket.util.UTF8BomFilter) Logger(io.vertx.core.logging.Logger) Message(io.vertx.rxjava.core.eventbus.Message) XMLSplitter(io.georocket.input.xml.XMLSplitter) OpenOptions(io.vertx.core.file.OpenOptions) GeoJsonSplitter(io.georocket.input.geojson.GeoJsonSplitter) NoStackTraceThrowable(io.vertx.core.impl.NoStackTraceThrowable) FirstLevelSplitter(io.georocket.input.xml.FirstLevelSplitter) Collectors(java.util.stream.Collectors) XMLCRSIndexer(io.georocket.index.xml.XMLCRSIndexer) JsonArray(io.vertx.core.json.JsonArray) List(java.util.List) Stream(java.util.stream.Stream) MimeTypeUtils.belongsTo(io.georocket.util.MimeTypeUtils.belongsTo) ReadStream(io.vertx.rxjava.core.streams.ReadStream) AddressConstants(io.georocket.constants.AddressConstants) RxUtils(io.georocket.util.RxUtils) ConfigConstants(io.georocket.constants.ConfigConstants) Buffer(io.vertx.rxjava.core.buffer.Buffer) StringWindow(io.georocket.util.StringWindow) GeoJsonSplitter(io.georocket.input.geojson.GeoJsonSplitter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) UTF8BomFilter(io.georocket.util.UTF8BomFilter) IndexMeta(io.georocket.storage.IndexMeta) JsonParserOperator(io.georocket.util.JsonParserOperator)

Example 2 with Window

use of io.georocket.util.Window in project georocket by georocket.

the class ImporterVerticle method importXML.

/**
 * Imports an XML file from the given input stream into the store
 * @param f the XML file to read
 * @param correlationId a unique identifier for this import process
 * @param filename the name of the file currently being imported
 * @param timestamp denotes when the import process has started
 * @param layer the layer where the file should be stored (may be null)
 * @param tags the list of tags to attach to the file (may be null)
 * @param properties the map of properties to attach to the file (may be null)
 * @param fallbackCRSString the CRS which should be used if the imported
 * file does not specify one (may be <code>null</code>)
 * @return a single that will emit when the file has been imported
 */
protected Single<Integer> importXML(ReadStream<Buffer> f, String correlationId, String filename, long timestamp, String layer, List<String> tags, Map<String, Object> properties, String fallbackCRSString) {
    UTF8BomFilter bomFilter = new UTF8BomFilter();
    Window window = new Window();
    XMLSplitter splitter = new FirstLevelSplitter(window);
    AtomicInteger processing = new AtomicInteger(0);
    XMLCRSIndexer crsIndexer = new XMLCRSIndexer();
    return f.toObservable().map(buf -> (io.vertx.core.buffer.Buffer) buf.getDelegate()).map(bomFilter::filter).doOnNext(window::append).lift(new XMLParserOperator()).doOnNext(e -> {
        // save the first CRS found in the file
        if (crsIndexer.getCRS() == null) {
            crsIndexer.onEvent(e);
        }
    }).flatMap(splitter::onEventObservable).flatMapSingle(result -> {
        String crsString = fallbackCRSString;
        if (crsIndexer.getCRS() != null) {
            crsString = crsIndexer.getCRS();
        }
        IndexMeta indexMeta = new IndexMeta(correlationId, filename, timestamp, tags, properties, crsString);
        return addToStoreWithPause(result, layer, indexMeta, f, processing);
    }).count().toSingle();
}
Also used : StringWindow(io.georocket.util.StringWindow) Window(io.georocket.util.Window) XMLParserOperator(io.georocket.util.XMLParserOperator) XMLCRSIndexer(io.georocket.index.xml.XMLCRSIndexer) FirstLevelSplitter(io.georocket.input.xml.FirstLevelSplitter) XMLSplitter(io.georocket.input.xml.XMLSplitter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) UTF8BomFilter(io.georocket.util.UTF8BomFilter) IndexMeta(io.georocket.storage.IndexMeta)

Example 3 with Window

use of io.georocket.util.Window in project georocket by georocket.

the class FirstLevelSplitterTest method split.

/**
 * Use the {@link FirstLevelSplitter} and split an XML string
 * @param xml the XML string
 * @return the chunks created by the splitter
 * @throws Exception if the XML string could not be parsed
 */
private List<Result<XMLChunkMeta>> split(String xml) throws Exception {
    Window window = new Window();
    window.append(Buffer.buffer(xml));
    AsyncXMLInputFactory xmlInputFactory = new InputFactoryImpl();
    AsyncXMLStreamReader<AsyncByteArrayFeeder> reader = xmlInputFactory.createAsyncForByteArray();
    byte[] xmlBytes = xml.getBytes(StandardCharsets.UTF_8);
    reader.getInputFeeder().feedInput(xmlBytes, 0, xmlBytes.length);
    FirstLevelSplitter splitter = new FirstLevelSplitter(window);
    List<Result<XMLChunkMeta>> chunks = new ArrayList<>();
    while (reader.hasNext()) {
        int event = reader.next();
        if (event == AsyncXMLStreamReader.EVENT_INCOMPLETE) {
            reader.close();
            continue;
        }
        int pos = reader.getLocation().getCharacterOffset();
        Result<XMLChunkMeta> chunk = splitter.onEvent(new XMLStreamEvent(event, pos, reader));
        if (chunk != null) {
            chunks.add(chunk);
        }
    }
    return chunks;
}
Also used : Window(io.georocket.util.Window) XMLStreamEvent(io.georocket.util.XMLStreamEvent) ArrayList(java.util.ArrayList) AsyncByteArrayFeeder(com.fasterxml.aalto.AsyncByteArrayFeeder) InputFactoryImpl(com.fasterxml.aalto.stax.InputFactoryImpl) Result(io.georocket.input.Splitter.Result) XMLChunkMeta(io.georocket.storage.XMLChunkMeta) AsyncXMLInputFactory(com.fasterxml.aalto.AsyncXMLInputFactory)

Aggregations

Window (io.georocket.util.Window)3 XMLCRSIndexer (io.georocket.index.xml.XMLCRSIndexer)2 Result (io.georocket.input.Splitter.Result)2 FirstLevelSplitter (io.georocket.input.xml.FirstLevelSplitter)2 XMLSplitter (io.georocket.input.xml.XMLSplitter)2 IndexMeta (io.georocket.storage.IndexMeta)2 StringWindow (io.georocket.util.StringWindow)2 UTF8BomFilter (io.georocket.util.UTF8BomFilter)2 XMLParserOperator (io.georocket.util.XMLParserOperator)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 AsyncByteArrayFeeder (com.fasterxml.aalto.AsyncByteArrayFeeder)1 AsyncXMLInputFactory (com.fasterxml.aalto.AsyncXMLInputFactory)1 InputFactoryImpl (com.fasterxml.aalto.stax.InputFactoryImpl)1 AddressConstants (io.georocket.constants.AddressConstants)1 ConfigConstants (io.georocket.constants.ConfigConstants)1 GeoJsonSplitter (io.georocket.input.geojson.GeoJsonSplitter)1 ChunkMeta (io.georocket.storage.ChunkMeta)1 RxStore (io.georocket.storage.RxStore)1 StoreFactory (io.georocket.storage.StoreFactory)1 XMLChunkMeta (io.georocket.storage.XMLChunkMeta)1