Search in sources :

Example 16 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class JsonReaderTest method testEmptyJSONText.

@Test
public void testEmptyJSONText() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    // input is empty
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8(// empty row
    ""));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    // expect a ParseException on the following `next` call on iterator
    expectedException.expect(ParseException.class);
    // the 2nd line is ill-formed, so the parse of this text chunk aborts
    final int numExpectedIterations = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            iterator.next();
            ++numActualIterations;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 17 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class JsonReaderTest method testInvalidJSONText.

@Test
public void testInvalidJSONText() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":1}}" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4xxx,\"o\":{\"mg\":2}}" + // baz property is illegal
    "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":3}}"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    // expect a ParseException on the following `next` call on iterator
    expectedException.expect(ParseException.class);
    // the 2nd line is ill-formed, so the parse of this text chunk aborts
    final int numExpectedIterations = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            iterator.next();
            ++numActualIterations;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 18 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class CsvReaderTest method assertResult.

private void assertResult(ByteEntity source, CsvInputFormat format) throws IOException {
    final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
    int numResults = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-01T00:00:%02dZ", (numResults + 1) * 10)), row.getTimestamp());
            Assert.assertEquals(StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")));
            Assert.assertEquals(Integer.toString((numResults + 1) * 5), Iterables.getOnlyElement(row.getDimension("score")));
            numResults++;
        }
        Assert.assertEquals(3, numResults);
    }
}
Also used : MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) InputEntityReader(org.apache.druid.data.input.InputEntityReader)

Example 19 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class CsvReaderTest method testMultiValues.

@Test
public void testMultiValues() throws IOException {
    final ByteEntity source = writeData(ImmutableList.of("ts,name,score", "2019-01-01T00:00:10Z,name_1,5|1", "2019-01-01T00:00:20Z,name_2,10|2", "2019-01-01T00:00:30Z,name_3,15|3"));
    final CsvInputFormat format = new CsvInputFormat(ImmutableList.of(), "|", null, true, 0);
    final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
    int numResults = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(DateTimes.of(StringUtils.format("2019-01-01T00:00:%02dZ", (numResults + 1) * 10)), row.getTimestamp());
            Assert.assertEquals(StringUtils.format("name_%d", numResults + 1), Iterables.getOnlyElement(row.getDimension("name")));
            Assert.assertEquals(ImmutableList.of(Integer.toString((numResults + 1) * 5), Integer.toString(numResults + 1)), row.getDimension("score"));
            numResults++;
        }
        Assert.assertEquals(3, numResults);
    }
}
Also used : MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 20 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class CsvReaderTest method testRussianTextMess.

@Test
public void testRussianTextMess() throws IOException {
    final ByteEntity source = writeData(ImmutableList.of("2019-01-01T00:00:10Z,name_1,\"Как говорится: \\\"\"всё течет, всё изменяется\\\"\". Украина как всегда обвиняет Россию в собственных проблемах. #ПровокацияКиева\""));
    final CsvInputFormat format = new CsvInputFormat(ImmutableList.of("ts", "name", "Comment"), null, null, false, 0);
    final InputEntityReader reader = format.createReader(INPUT_ROW_SCHEMA, source, null);
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        Assert.assertTrue(iterator.hasNext());
        final InputRow row = iterator.next();
        Assert.assertEquals(DateTimes.of("2019-01-01T00:00:10Z"), row.getTimestamp());
        Assert.assertEquals("name_1", Iterables.getOnlyElement(row.getDimension("name")));
        Assert.assertEquals("Как говорится: \\\"всё течет, всё изменяется\\\". Украина как всегда обвиняет Россию в собственных проблемах. #ПровокацияКиева", Iterables.getOnlyElement(row.getDimension("Comment")));
        Assert.assertFalse(iterator.hasNext());
    }
}
Also used : MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Aggregations

InputEntityReader (org.apache.druid.data.input.InputEntityReader)58 Test (org.junit.Test)56 InputRow (org.apache.druid.data.input.InputRow)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)39 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)31 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)28 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)26 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)8 Configuration (org.apache.hadoop.conf.Configuration)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 AvroHadoopInputRowParserTest (org.apache.druid.data.input.AvroHadoopInputRowParserTest)5 AvroStreamInputRowParserTest (org.apache.druid.data.input.AvroStreamInputRowParserTest)5 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)5 KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)4 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)4 Headers (org.apache.kafka.common.header.Headers)4 RecordHeaders (org.apache.kafka.common.header.internals.RecordHeaders)4 BigDecimal (java.math.BigDecimal)3