Search in sources :

Example 16 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class MaterializedViewSupervisorSpecTest method testEmptyBaseDataSource.

@Test
public void testEmptyBaseDataSource() {
    expectedException.expect(CoreMatchers.instanceOf(IllegalArgumentException.class));
    expectedException.expectMessage("baseDataSource cannot be null or empty. Please provide a baseDataSource.");
    // noinspection ResultOfObjectAllocationIgnored (this method call will trigger the expected exception)
    new MaterializedViewSupervisorSpec("", new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("isUnpatrolled"), new StringDimensionSchema("metroCode"), new StringDimensionSchema("namespace"), new StringDimensionSchema("page"), new StringDimensionSchema("regionIsoCode"), new StringDimensionSchema("regionName"), new StringDimensionSchema("user"))), new AggregatorFactory[] { new CountAggregatorFactory("count"), new LongSumAggregatorFactory("added", "added") }, HadoopTuningConfig.makeDefaultTuningConfig(), null, null, null, null, null, false, objectMapper, null, null, null, null, null, new MaterializedViewTaskConfig(), EasyMock.createMock(AuthorizerMapper.class), new NoopChatHandlerProvider(), new SupervisorStateManagerConfig());
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) SupervisorStateManagerConfig(org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Example 17 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class MaterializedViewSupervisorSpecTest method testSupervisorSerialization.

@Test
public void testSupervisorSerialization() throws IOException {
    String supervisorStr = "{\n" + "  \"type\" : \"derivativeDataSource\",\n" + "  \"baseDataSource\": \"wikiticker\",\n" + "  \"dimensionsSpec\":{\n" + "            \"dimensions\" : [\n" + "              \"isUnpatrolled\",\n" + "              \"metroCode\",\n" + "              \"namespace\",\n" + "              \"page\",\n" + "              \"regionIsoCode\",\n" + "              \"regionName\",\n" + "              \"user\"\n" + "            ]\n" + "          },\n" + "    \"metricsSpec\" : [\n" + "        {\n" + "          \"name\" : \"count\",\n" + "          \"type\" : \"count\"\n" + "        },\n" + "        {\n" + "          \"name\" : \"added\",\n" + "          \"type\" : \"longSum\",\n" + "          \"fieldName\" : \"added\"\n" + "        }\n" + "      ],\n" + "  \"tuningConfig\": {\n" + "      \"type\" : \"hadoop\"\n" + "  }\n" + "}";
    MaterializedViewSupervisorSpec expected = new MaterializedViewSupervisorSpec("wikiticker", new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("isUnpatrolled"), new StringDimensionSchema("metroCode"), new StringDimensionSchema("namespace"), new StringDimensionSchema("page"), new StringDimensionSchema("regionIsoCode"), new StringDimensionSchema("regionName"), new StringDimensionSchema("user"))), new AggregatorFactory[] { new CountAggregatorFactory("count"), new LongSumAggregatorFactory("added", "added") }, HadoopTuningConfig.makeDefaultTuningConfig(), null, null, null, null, null, false, objectMapper, null, null, null, null, null, new MaterializedViewTaskConfig(), EasyMock.createMock(AuthorizerMapper.class), new NoopChatHandlerProvider(), new SupervisorStateManagerConfig());
    MaterializedViewSupervisorSpec spec = objectMapper.readValue(supervisorStr, MaterializedViewSupervisorSpec.class);
    Assert.assertEquals(expected.getBaseDataSource(), spec.getBaseDataSource());
    Assert.assertEquals(expected.getId(), spec.getId());
    Assert.assertEquals(expected.getDataSourceName(), spec.getDataSourceName());
    Assert.assertEquals(expected.getDimensions(), spec.getDimensions());
    Assert.assertEquals(expected.getMetrics(), spec.getMetrics());
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) SupervisorStateManagerConfig(org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Example 18 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class KafkaIndexTaskTest method testKafkaInputFormat.

@Test(timeout = 60_000L)
public void testKafkaInputFormat() throws Exception {
    // Insert data
    insertData(Iterables.limit(records, 3));
    final KafkaIndexTask task = createTask(null, new DataSchema("test_ds", new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim1t"), new StringDimensionSchema("dim2"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"), new StringDimensionSchema("kafka.testheader.encoding"))), new AggregatorFactory[] { new DoubleSumAggregatorFactory("met1sum", "met1"), new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, KAFKA_INPUT_FORMAT));
    Assert.assertTrue(task.supportsQueries());
    final ListenableFuture<TaskStatus> future = runTask(task);
    while (countEvents(task) != 3) {
        Thread.sleep(25);
    }
    Assert.assertEquals(Status.READING, task.getRunner().getStatus());
    final QuerySegmentSpec interval = OBJECT_MAPPER.readValue("\"2008/2012\"", QuerySegmentSpec.class);
    List<ScanResultValue> scanResultValues = scanData(task, interval);
    // verify that there are no records indexed in the rollbacked time period
    Assert.assertEquals(3, Iterables.size(scanResultValues));
    int i = 0;
    for (ScanResultValue result : scanResultValues) {
        final Map<String, Object> event = ((List<Map<String, Object>>) result.getEvents()).get(0);
        Assert.assertEquals("application/json", event.get("kafka.testheader.encoding"));
        Assert.assertEquals("y", event.get("dim2"));
    }
    // insert remaining data
    insertData(Iterables.skip(records, 3));
    // Wait for task to exit
    Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
    // Check metrics
    Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest)

Example 19 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
    });
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
    IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    });
    Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
    List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) TypeReference(com.fasterxml.jackson.core.type.TypeReference) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) InputRow(org.apache.druid.data.input.InputRow) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) DruidTable(org.apache.calcite.adapter.druid.DruidTable) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DruidStorageHandlerUtils(org.apache.hadoop.hive.druid.DruidStorageHandlerUtils) Constants(org.apache.hadoop.hive.conf.Constants) DruidStorageHandler(org.apache.hadoop.hive.druid.DruidStorageHandler) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) Ignore(org.junit.Ignore) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) IndexSpec(org.apache.druid.segment.IndexSpec) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(org.apache.druid.data.input.Firehose) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(org.apache.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 20 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class ProtobufParserBenchmark method setup.

@Setup
public void setup() {
    nestedParseSpec = new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("event"), new StringDimensionSchema("id"), new StringDimensionSchema("someOtherId"), new StringDimensionSchema("isValid"))), new JSONPathSpec(true, Lists.newArrayList(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "eventType", "eventType"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "foobar", "$.foo.bar"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar0", "$.bar[0].bar"))), null, null);
    flatParseSpec = new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("event"), new StringDimensionSchema("id"), new StringDimensionSchema("someOtherId"), new StringDimensionSchema("isValid"))), null, null, null);
    decoder = new FileBasedProtobufBytesDecoder("prototest.desc", "ProtoTestEvent");
    protoFilePath = "ProtoFile";
    protoInputs = getProtoInputs(protoFilePath);
    nestedParser = new ProtobufInputRowParser(nestedParseSpec, decoder, null, null);
    flatParser = new ProtobufInputRowParser(flatParseSpec, decoder, null, null);
}
Also used : ProtobufInputRowParser(org.apache.druid.data.input.protobuf.ProtobufInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) FileBasedProtobufBytesDecoder(org.apache.druid.data.input.protobuf.FileBasedProtobufBytesDecoder) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) Setup(org.openjdk.jmh.annotations.Setup)

Aggregations

StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)36 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)30 Test (org.junit.Test)24 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)19 LongDimensionSchema (org.apache.druid.data.input.impl.LongDimensionSchema)15 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)15 FloatDimensionSchema (org.apache.druid.data.input.impl.FloatDimensionSchema)14 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)12 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)11 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)11 File (java.io.File)8 ArrayList (java.util.ArrayList)8 Before (org.junit.Before)8 ImmutableList (com.google.common.collect.ImmutableList)7 HashMap (java.util.HashMap)7 DataSchema (org.apache.druid.segment.indexing.DataSchema)7 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)7 List (java.util.List)6 SupervisorStateManagerConfig (org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5