Search in sources :

Example 31 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class GoogleCloudStorageInputSourceTest method testCompressedReader.

@Test
public void testCompressedReader() throws IOException {
    EasyMock.reset(STORAGE);
    EasyMock.reset(INPUT_DATA_CONFIG);
    addExpectedPrefixObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(0)));
    addExpectedGetCompressedObjectMock(EXPECTED_COMPRESSED_URIS.get(0));
    addExpectedPrefixObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(1)));
    addExpectedGetCompressedObjectMock(EXPECTED_COMPRESSED_URIS.get(1));
    EasyMock.expect(INPUT_DATA_CONFIG.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
    EasyMock.replay(STORAGE);
    EasyMock.replay(INPUT_DATA_CONFIG);
    GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, PREFIXES, null);
    InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
    InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), null);
    CloseableIterator<InputRow> iterator = reader.read();
    while (iterator.hasNext()) {
        InputRow nextRow = iterator.next();
        Assert.assertEquals(NOW, nextRow.getTimestamp());
        Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
        Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
    }
}
Also used : InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 32 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class KafkaInputFormatTest method testTimestampFromHeader.

@Test
public void testTimestampFromHeader() throws IOException {
    Iterable<Header> sample_header_with_ts = Iterables.unmodifiableIterable(Iterables.concat(SAMPLE_HEADERS, ImmutableList.of(new Header() {

        @Override
        public String key() {
            return "headerTs";
        }

        @Override
        public byte[] value() {
            return "2021-06-24".getBytes(StandardCharsets.UTF_8);
        }
    })));
    final byte[] key = StringUtils.toUtf8("{\n" + "    \"key\": \"sampleKey\"\n" + "}");
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-24\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(sample_header_with_ts);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, key, payload, headers));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("kafka.newheader.headerTs", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            final MapBasedInputRow mrow = (MapBasedInputRow) row;
            // Payload verifications
            Assert.assertEquals(DateTimes.of("2021-06-24"), row.getTimestamp());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            // Header verification
            Assert.assertEquals("application/json", Iterables.getOnlyElement(row.getDimension("kafka.newheader.encoding")));
            Assert.assertEquals("pkc-bar", Iterables.getOnlyElement(row.getDimension("kafka.newheader.kafkapkc")));
            Assert.assertEquals(String.valueOf(DateTimes.of("2021-06-24").getMillis()), Iterables.getOnlyElement(row.getDimension("kafka.newts.timestamp")));
            Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("kafka.newheader.headerTs")));
            Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("timestamp")));
            // Key verification
            Assert.assertEquals("sampleKey", Iterables.getOnlyElement(row.getDimension("kafka.newkey.key")));
            Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
            Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) Header(org.apache.kafka.common.header.Header) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 33 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class KafkaInputFormat method createReader.

@Override
public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory) {
    KafkaRecordEntity record = (KafkaRecordEntity) source;
    InputRowSchema newInputRowSchema = new InputRowSchema(dummyTimestampSpec, inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter());
    return new KafkaInputReader(inputRowSchema, record, (headerFormat == null) ? null : headerFormat.createReader(record.getRecord().headers(), headerColumnPrefix), (keyFormat == null || record.getRecord().key() == null) ? null : keyFormat.createReader(newInputRowSchema, new ByteEntity(record.getRecord().key()), temporaryDirectory), (record.getRecord().value() == null) ? null : valueFormat.createReader(newInputRowSchema, source, temporaryDirectory), keyColumnName, timestampColumnName);
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) InputRowSchema(org.apache.druid.data.input.InputRowSchema)

Example 34 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class JsonLineReaderTest method testFalseKeepNullColumns.

@Test
public void testFalseKeepNullColumns() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"))), null, false);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"o\":{\"mg\":\"a\"}}"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.emptyList())), ColumnsFilter.all()), source, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(Arrays.asList("path_omg", "timestamp", "foo"), row.getDimensions());
            Assert.assertTrue(row.getDimension("bar").isEmpty());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("path_omg")));
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 35 with InputRowSchema

use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.

the class JsonLineReaderTest method testParseRow.

@Test
public void testParseRow() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":1}}"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            Assert.assertEquals(DateTimes.of("2019-01-01"), row.getTimestamp());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
            Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Aggregations

InputRowSchema (org.apache.druid.data.input.InputRowSchema)63 Test (org.junit.Test)55 InputRow (org.apache.druid.data.input.InputRow)52 InputEntityReader (org.apache.druid.data.input.InputEntityReader)39 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)37 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)36 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)29 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)26 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)24 InputSourceReader (org.apache.druid.data.input.InputSourceReader)10 ByteEntity (org.apache.druid.data.input.impl.ByteEntity)9 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)9 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)9 File (java.io.File)7 KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)5 ArrayList (java.util.ArrayList)4 Collections (java.util.Collections)4 List (java.util.List)4 Map (java.util.Map)4 Nullable (javax.annotation.Nullable)4