Search in sources :

Example 26 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class JsonReaderTest method testParsePrettyFormatJSON.

@Test
public void testParsePrettyFormatJSON() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2019-01-01\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(DateTimes.of("2019-01-01"), row.getTimestamp());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
            Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 27 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class JsonReaderTest method testEmptyJSONText.

@Test
public void testEmptyJSONText() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    // input is empty
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8(// empty row
    ""));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    // expect a ParseException on the following `next` call on iterator
    expectedException.expect(ParseException.class);
    // the 2nd line is ill-formed, so the parse of this text chunk aborts
    final int numExpectedIterations = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            iterator.next();
            ++numActualIterations;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 28 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class JsonReaderTest method testInvalidJSONText.

@Test
public void testInvalidJSONText() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":1}}" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4xxx,\"o\":{\"mg\":2}}" + // baz property is illegal
    "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":3}}"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    // expect a ParseException on the following `next` call on iterator
    expectedException.expect(ParseException.class);
    // the 2nd line is ill-formed, so the parse of this text chunk aborts
    final int numExpectedIterations = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            iterator.next();
            ++numActualIterations;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 29 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class OssInputSourceTest method testReader.

@Test
public void testReader() throws IOException {
    EasyMock.reset(OSSCLIENT);
    expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
    expectListObjects(EXPECTED_URIS.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
    expectGetObject(EXPECTED_URIS.get(0));
    expectGetObject(EXPECTED_URIS.get(1));
    EasyMock.replay(OSSCLIENT);
    OssInputSource inputSource = new OssInputSource(OSSCLIENT, INPUT_DATA_CONFIG, null, ImmutableList.of(PREFIXES.get(0), EXPECTED_URIS.get(1)), null, null);
    InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
    InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), temporaryFolder.newFolder());
    CloseableIterator<InputRow> iterator = reader.read();
    while (iterator.hasNext()) {
        InputRow nextRow = iterator.next();
        Assert.assertEquals(NOW, nextRow.getTimestamp());
        Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
        Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
    }
    EasyMock.verify(OSSCLIENT);
}
Also used : InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 30 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class AvroOCFReaderTest method createReader.

private InputEntityReader createReader(ObjectMapper mapper, Map<String, Object> readerSchema) throws Exception {
    final GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
    final File someAvroFile = AvroHadoopInputRowParserTest.createAvroFile(someAvroDatum);
    final TimestampSpec timestampSpec = new TimestampSpec("timestamp", "auto", null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("eventType")));
    final AvroOCFInputFormat inputFormat = new AvroOCFInputFormat(mapper, null, readerSchema, null, null);
    final InputRowSchema schema = new InputRowSchema(timestampSpec, dimensionsSpec, ColumnsFilter.all());
    final FileEntity entity = new FileEntity(someAvroFile);
    return inputFormat.createReader(schema, entity, temporaryFolder.newFolder());
}
Also used : FileEntity(org.apache.druid.data.input.impl.FileEntity) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Aggregations

InputRowSchema (org.apache.druid.data.input.InputRowSchema)63 Test (org.junit.Test)55 InputRow (org.apache.druid.data.input.InputRow)52 InputEntityReader (org.apache.druid.data.input.InputEntityReader)39 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)37 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)36 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)29 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)26 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)24 InputSourceReader (org.apache.druid.data.input.InputSourceReader)10 ByteEntity (org.apache.druid.data.input.impl.ByteEntity)9 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)9 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)9 File (java.io.File)7 KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)5 ArrayList (java.util.ArrayList)4 Collections (java.util.Collections)4 List (java.util.List)4 Map (java.util.Map)4 Nullable (javax.annotation.Nullable)4