Search in sources :

Example 1 with Result

use of io.georocket.input.Splitter.Result in project georocket by georocket.

the class ImporterVerticle method importJSON.

/**
 * Imports a JSON file from the given input stream into the store
 * @param f the JSON file to read
 * @param correlationId a unique identifier for this import process
 * @param filename the name of the file currently being imported
 * @param timestamp denotes when the import process has started
 * @param layer the layer where the file should be stored (may be null)
 * @param tags the list of tags to attach to the file (may be null)
 * @param properties the map of properties to attach to the file (may be null)
 * @return a single that will emit when the file has been imported
 */
protected Single<Integer> importJSON(ReadStream<Buffer> f, String correlationId, String filename, long timestamp, String layer, List<String> tags, Map<String, Object> properties) {
    UTF8BomFilter bomFilter = new UTF8BomFilter();
    StringWindow window = new StringWindow();
    GeoJsonSplitter splitter = new GeoJsonSplitter(window);
    AtomicInteger processing = new AtomicInteger(0);
    return f.toObservable().map(buf -> (io.vertx.core.buffer.Buffer) buf.getDelegate()).map(bomFilter::filter).doOnNext(window::append).lift(new JsonParserOperator()).flatMap(splitter::onEventObservable).flatMapSingle(result -> {
        IndexMeta indexMeta = new IndexMeta(correlationId, filename, timestamp, tags, properties, null);
        return addToStoreWithPause(result, layer, indexMeta, f, processing);
    }).count().toSingle();
}
Also used : Buffer(io.vertx.rxjava.core.buffer.Buffer) IndexMeta(io.georocket.storage.IndexMeta) StringWindow(io.georocket.util.StringWindow) XMLParserOperator(io.georocket.util.XMLParserOperator) ChunkMeta(io.georocket.storage.ChunkMeta) Window(io.georocket.util.Window) LoggerFactory(io.vertx.core.logging.LoggerFactory) RxStore(io.georocket.storage.RxStore) AbstractVerticle(io.vertx.rxjava.core.AbstractVerticle) Single(rx.Single) JsonParserOperator(io.georocket.util.JsonParserOperator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Result(io.georocket.input.Splitter.Result) Map(java.util.Map) FileSystem(io.vertx.rxjava.core.file.FileSystem) JsonObject(io.vertx.core.json.JsonObject) StoreFactory(io.georocket.storage.StoreFactory) UTF8BomFilter(io.georocket.util.UTF8BomFilter) Logger(io.vertx.core.logging.Logger) Message(io.vertx.rxjava.core.eventbus.Message) XMLSplitter(io.georocket.input.xml.XMLSplitter) OpenOptions(io.vertx.core.file.OpenOptions) GeoJsonSplitter(io.georocket.input.geojson.GeoJsonSplitter) NoStackTraceThrowable(io.vertx.core.impl.NoStackTraceThrowable) FirstLevelSplitter(io.georocket.input.xml.FirstLevelSplitter) Collectors(java.util.stream.Collectors) XMLCRSIndexer(io.georocket.index.xml.XMLCRSIndexer) JsonArray(io.vertx.core.json.JsonArray) List(java.util.List) Stream(java.util.stream.Stream) MimeTypeUtils.belongsTo(io.georocket.util.MimeTypeUtils.belongsTo) ReadStream(io.vertx.rxjava.core.streams.ReadStream) AddressConstants(io.georocket.constants.AddressConstants) RxUtils(io.georocket.util.RxUtils) ConfigConstants(io.georocket.constants.ConfigConstants) Buffer(io.vertx.rxjava.core.buffer.Buffer) StringWindow(io.georocket.util.StringWindow) GeoJsonSplitter(io.georocket.input.geojson.GeoJsonSplitter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) UTF8BomFilter(io.georocket.util.UTF8BomFilter) IndexMeta(io.georocket.storage.IndexMeta) JsonParserOperator(io.georocket.util.JsonParserOperator)

Example 2 with Result

use of io.georocket.input.Splitter.Result in project georocket by georocket.

the class FirstLevelSplitterTest method oneChunk.

/**
 * Test if an XML string with one chunk can be split
 * @throws Exception if an error has occurred
 */
@Test
public void oneChunk() throws Exception {
    String xml = XMLHEADER + "<root>\n<object><child></child></object>\n</root>";
    List<Result<XMLChunkMeta>> chunks = split(xml);
    assertEquals(1, chunks.size());
    Result<XMLChunkMeta> chunk = chunks.get(0);
    XMLChunkMeta meta = new XMLChunkMeta(Arrays.asList(new XMLStartElement("root")), XMLHEADER.length() + 7, xml.length() - 8);
    assertEquals(meta, chunk.getMeta());
    assertEquals(xml, chunk.getChunk());
}
Also used : XMLStartElement(io.georocket.util.XMLStartElement) XMLChunkMeta(io.georocket.storage.XMLChunkMeta) Result(io.georocket.input.Splitter.Result) Test(org.junit.Test)

Example 3 with Result

use of io.georocket.input.Splitter.Result in project georocket by georocket.

the class FirstLevelSplitterTest method twoChunks.

/**
 * Test if an XML string with tow chunks can be split
 * @throws Exception if an error has occurred
 */
@Test
public void twoChunks() throws Exception {
    String xml = XMLHEADER + "<root><object><child></child></object>" + "<object><child2></child2></object></root>";
    List<Result<XMLChunkMeta>> chunks = split(xml);
    assertEquals(2, chunks.size());
    Result<XMLChunkMeta> chunk1 = chunks.get(0);
    Result<XMLChunkMeta> chunk2 = chunks.get(1);
    List<XMLStartElement> parents = Arrays.asList(new XMLStartElement("root"));
    XMLChunkMeta meta1 = new XMLChunkMeta(parents, XMLHEADER.length() + 7, XMLHEADER.length() + 7 + 32);
    XMLChunkMeta meta2 = new XMLChunkMeta(parents, XMLHEADER.length() + 7, XMLHEADER.length() + 7 + 34);
    assertEquals(meta1, chunk1.getMeta());
    assertEquals(meta2, chunk2.getMeta());
    assertEquals(XMLHEADER + "<root>\n<object><child></child></object>\n</root>", chunk1.getChunk());
    assertEquals(XMLHEADER + "<root>\n<object><child2></child2></object>\n</root>", chunk2.getChunk());
}
Also used : XMLStartElement(io.georocket.util.XMLStartElement) XMLChunkMeta(io.georocket.storage.XMLChunkMeta) Result(io.georocket.input.Splitter.Result) Test(org.junit.Test)

Example 4 with Result

use of io.georocket.input.Splitter.Result in project georocket by georocket.

the class FirstLevelSplitterTest method attributes.

/**
 * Test if an XML string with two chunks and a attributes can be split
 * @throws Exception if an error has occurred
 */
@Test
public void attributes() throws Exception {
    String root = "<root key=\"value\" key2=\"value2\">";
    String xml = XMLHEADER + root + "<object ok=\"ov\"><child></child></object>" + "<object><child2></child2></object></root>";
    List<Result<XMLChunkMeta>> chunks = split(xml);
    assertEquals(2, chunks.size());
    Result<XMLChunkMeta> chunk1 = chunks.get(0);
    Result<XMLChunkMeta> chunk2 = chunks.get(1);
    List<XMLStartElement> parents = Arrays.asList(new XMLStartElement(null, "root", new String[] { "", "" }, new String[] { "key", "key2" }, new String[] { "value", "value2" }));
    XMLChunkMeta meta1 = new XMLChunkMeta(parents, XMLHEADER.length() + root.length() + 1, XMLHEADER.length() + root.length() + 1 + 40);
    XMLChunkMeta meta2 = new XMLChunkMeta(parents, XMLHEADER.length() + root.length() + 1, XMLHEADER.length() + root.length() + 1 + 34);
    assertEquals(meta1, chunk1.getMeta());
    assertEquals(meta2, chunk2.getMeta());
    assertEquals(XMLHEADER + root + "\n<object ok=\"ov\"><child></child></object>\n</root>", chunk1.getChunk());
    assertEquals(XMLHEADER + root + "\n<object><child2></child2></object>\n</root>", chunk2.getChunk());
}
Also used : XMLStartElement(io.georocket.util.XMLStartElement) XMLChunkMeta(io.georocket.storage.XMLChunkMeta) Result(io.georocket.input.Splitter.Result) Test(org.junit.Test)

Example 5 with Result

use of io.georocket.input.Splitter.Result in project georocket by georocket.

the class FirstLevelSplitterTest method split.

/**
 * Use the {@link FirstLevelSplitter} and split an XML string
 * @param xml the XML string
 * @return the chunks created by the splitter
 * @throws Exception if the XML string could not be parsed
 */
private List<Result<XMLChunkMeta>> split(String xml) throws Exception {
    Window window = new Window();
    window.append(Buffer.buffer(xml));
    AsyncXMLInputFactory xmlInputFactory = new InputFactoryImpl();
    AsyncXMLStreamReader<AsyncByteArrayFeeder> reader = xmlInputFactory.createAsyncForByteArray();
    byte[] xmlBytes = xml.getBytes(StandardCharsets.UTF_8);
    reader.getInputFeeder().feedInput(xmlBytes, 0, xmlBytes.length);
    FirstLevelSplitter splitter = new FirstLevelSplitter(window);
    List<Result<XMLChunkMeta>> chunks = new ArrayList<>();
    while (reader.hasNext()) {
        int event = reader.next();
        if (event == AsyncXMLStreamReader.EVENT_INCOMPLETE) {
            reader.close();
            continue;
        }
        int pos = reader.getLocation().getCharacterOffset();
        Result<XMLChunkMeta> chunk = splitter.onEvent(new XMLStreamEvent(event, pos, reader));
        if (chunk != null) {
            chunks.add(chunk);
        }
    }
    return chunks;
}
Also used : Window(io.georocket.util.Window) XMLStreamEvent(io.georocket.util.XMLStreamEvent) ArrayList(java.util.ArrayList) AsyncByteArrayFeeder(com.fasterxml.aalto.AsyncByteArrayFeeder) InputFactoryImpl(com.fasterxml.aalto.stax.InputFactoryImpl) Result(io.georocket.input.Splitter.Result) XMLChunkMeta(io.georocket.storage.XMLChunkMeta) AsyncXMLInputFactory(com.fasterxml.aalto.AsyncXMLInputFactory)

Aggregations

Result (io.georocket.input.Splitter.Result)8 XMLChunkMeta (io.georocket.storage.XMLChunkMeta)7 XMLStartElement (io.georocket.util.XMLStartElement)6 Test (org.junit.Test)6 Window (io.georocket.util.Window)2 AsyncByteArrayFeeder (com.fasterxml.aalto.AsyncByteArrayFeeder)1 AsyncXMLInputFactory (com.fasterxml.aalto.AsyncXMLInputFactory)1 InputFactoryImpl (com.fasterxml.aalto.stax.InputFactoryImpl)1 AddressConstants (io.georocket.constants.AddressConstants)1 ConfigConstants (io.georocket.constants.ConfigConstants)1 XMLCRSIndexer (io.georocket.index.xml.XMLCRSIndexer)1 GeoJsonSplitter (io.georocket.input.geojson.GeoJsonSplitter)1 FirstLevelSplitter (io.georocket.input.xml.FirstLevelSplitter)1 XMLSplitter (io.georocket.input.xml.XMLSplitter)1 ChunkMeta (io.georocket.storage.ChunkMeta)1 IndexMeta (io.georocket.storage.IndexMeta)1 RxStore (io.georocket.storage.RxStore)1 StoreFactory (io.georocket.storage.StoreFactory)1 JsonParserOperator (io.georocket.util.JsonParserOperator)1 MimeTypeUtils.belongsTo (io.georocket.util.MimeTypeUtils.belongsTo)1