Search in sources :

Example 66 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class AvroOCFReaderTest method createReader.

private InputEntityReader createReader(ObjectMapper mapper, Map<String, Object> readerSchema) throws Exception {
    final GenericRecord someAvroDatum = AvroStreamInputRowParserTest.buildSomeAvroDatum();
    final File someAvroFile = AvroHadoopInputRowParserTest.createAvroFile(someAvroDatum);
    final TimestampSpec timestampSpec = new TimestampSpec("timestamp", "auto", null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("eventType")));
    final AvroOCFInputFormat inputFormat = new AvroOCFInputFormat(mapper, null, readerSchema, null, null);
    final InputRowSchema schema = new InputRowSchema(timestampSpec, dimensionsSpec, ColumnsFilter.all());
    final FileEntity entity = new FileEntity(someAvroFile);
    return inputFormat.createReader(schema, entity, temporaryFolder.newFolder());
}
Also used : FileEntity(org.apache.druid.data.input.impl.FileEntity) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 67 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class GoogleCloudStorageInputSourceTest method testCompressedReader.

@Test
public void testCompressedReader() throws IOException {
    EasyMock.reset(STORAGE);
    EasyMock.reset(INPUT_DATA_CONFIG);
    addExpectedPrefixObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(0)));
    addExpectedGetCompressedObjectMock(EXPECTED_COMPRESSED_URIS.get(0));
    addExpectedPrefixObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_COMPRESSED_URIS.get(1)));
    addExpectedGetCompressedObjectMock(EXPECTED_COMPRESSED_URIS.get(1));
    EasyMock.expect(INPUT_DATA_CONFIG.getMaxListingLength()).andReturn(MAX_LISTING_LENGTH);
    EasyMock.replay(STORAGE);
    EasyMock.replay(INPUT_DATA_CONFIG);
    GoogleCloudStorageInputSource inputSource = new GoogleCloudStorageInputSource(STORAGE, INPUT_DATA_CONFIG, null, PREFIXES, null);
    InputRowSchema someSchema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2"))), ColumnsFilter.all());
    InputSourceReader reader = inputSource.reader(someSchema, new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), null);
    CloseableIterator<InputRow> iterator = reader.read();
    while (iterator.hasNext()) {
        InputRow nextRow = iterator.next();
        Assert.assertEquals(NOW, nextRow.getTimestamp());
        Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0));
        Assert.assertEquals("world", nextRow.getDimension("dim2").get(0));
    }
}
Also used : InputSourceReader(org.apache.druid.data.input.InputSourceReader) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 68 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class KafkaInputFormatTest method testTimestampFromHeader.

@Test
public void testTimestampFromHeader() throws IOException {
    Iterable<Header> sample_header_with_ts = Iterables.unmodifiableIterable(Iterables.concat(SAMPLE_HEADERS, ImmutableList.of(new Header() {

        @Override
        public String key() {
            return "headerTs";
        }

        @Override
        public byte[] value() {
            return "2021-06-24".getBytes(StandardCharsets.UTF_8);
        }
    })));
    final byte[] key = StringUtils.toUtf8("{\n" + "    \"key\": \"sampleKey\"\n" + "}");
    final byte[] payload = StringUtils.toUtf8("{\n" + "    \"timestamp\": \"2021-06-24\",\n" + "    \"bar\": null,\n" + "    \"foo\": \"x\",\n" + "    \"baz\": 4,\n" + "    \"o\": {\n" + "        \"mg\": 1\n" + "    }\n" + "}");
    Headers headers = new RecordHeaders(sample_header_with_ts);
    inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, key, payload, headers));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("kafka.newheader.headerTs", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newheader.encoding", "kafka.newheader.kafkapkc"))), ColumnsFilter.all()), inputEntity, null);
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            final MapBasedInputRow mrow = (MapBasedInputRow) row;
            // Payload verifications
            Assert.assertEquals(DateTimes.of("2021-06-24"), row.getTimestamp());
            Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
            Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
            Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
            // Header verification
            Assert.assertEquals("application/json", Iterables.getOnlyElement(row.getDimension("kafka.newheader.encoding")));
            Assert.assertEquals("pkc-bar", Iterables.getOnlyElement(row.getDimension("kafka.newheader.kafkapkc")));
            Assert.assertEquals(String.valueOf(DateTimes.of("2021-06-24").getMillis()), Iterables.getOnlyElement(row.getDimension("kafka.newts.timestamp")));
            Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("kafka.newheader.headerTs")));
            Assert.assertEquals("2021-06-24", Iterables.getOnlyElement(row.getDimension("timestamp")));
            // Key verification
            Assert.assertEquals("sampleKey", Iterables.getOnlyElement(row.getDimension("kafka.newkey.key")));
            Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
            Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            numActualIterations++;
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : KafkaRecordEntity(org.apache.druid.data.input.kafka.KafkaRecordEntity) Headers(org.apache.kafka.common.header.Headers) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) InputEntityReader(org.apache.druid.data.input.InputEntityReader) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordHeaders(org.apache.kafka.common.header.internals.RecordHeaders) Header(org.apache.kafka.common.header.Header) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 69 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class KafkaIndexTaskTest method testKafkaInputFormat.

@Test(timeout = 60_000L)
public void testKafkaInputFormat() throws Exception {
    // Insert data
    insertData(Iterables.limit(records, 3));
    final KafkaIndexTask task = createTask(null, new DataSchema("test_ds", new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim1t"), new StringDimensionSchema("dim2"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"), new StringDimensionSchema("kafka.testheader.encoding"))), new AggregatorFactory[] { new DoubleSumAggregatorFactory("met1sum", "met1"), new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, KAFKA_INPUT_FORMAT));
    Assert.assertTrue(task.supportsQueries());
    final ListenableFuture<TaskStatus> future = runTask(task);
    while (countEvents(task) != 3) {
        Thread.sleep(25);
    }
    Assert.assertEquals(Status.READING, task.getRunner().getStatus());
    final QuerySegmentSpec interval = OBJECT_MAPPER.readValue("\"2008/2012\"", QuerySegmentSpec.class);
    List<ScanResultValue> scanResultValues = scanData(task, interval);
    // verify that there are no records indexed in the rollbacked time period
    Assert.assertEquals(3, Iterables.size(scanResultValues));
    int i = 0;
    for (ScanResultValue result : scanResultValues) {
        final Map<String, Object> event = ((List<Map<String, Object>>) result.getEvents()).get(0);
        Assert.assertEquals("application/json", event.get("kafka.testheader.encoding"));
        Assert.assertEquals("y", event.get("dim2"));
    }
    // insert remaining data
    insertData(Iterables.skip(records, 3));
    // Wait for task to exit
    Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
    // Check metrics
    Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest)

Example 70 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class KafkaSupervisorTest method getDataSchema.

private static DataSchema getDataSchema(String dataSource) {
    List<DimensionSchema> dimensions = new ArrayList<>();
    dimensions.add(StringDimensionSchema.create("dim1"));
    dimensions.add(StringDimensionSchema.create("dim2"));
    return new DataSchema(dataSource, new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.of()), null);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ArrayList(java.util.ArrayList) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema)

Aggregations

TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)154 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)113 Test (org.junit.Test)110 DataSchema (org.apache.druid.segment.indexing.DataSchema)49 InputRow (org.apache.druid.data.input.InputRow)47 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)41 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)39 InputRowSchema (org.apache.druid.data.input.InputRowSchema)37 InputEntityReader (org.apache.druid.data.input.InputEntityReader)33 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)32 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)30 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)29 ArrayList (java.util.ArrayList)28 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)28 StringInputRowParser (org.apache.druid.data.input.impl.StringInputRowParser)27 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)25 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)21 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)21 ArbitraryGranularitySpec (org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec)20