use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.
the class MaterializedViewSupervisorSpecTest method testEmptyBaseDataSource.
@Test
public void testEmptyBaseDataSource() {
expectedException.expect(CoreMatchers.instanceOf(IllegalArgumentException.class));
expectedException.expectMessage("baseDataSource cannot be null or empty. Please provide a baseDataSource.");
// noinspection ResultOfObjectAllocationIgnored (this method call will trigger the expected exception)
new MaterializedViewSupervisorSpec("", new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("isUnpatrolled"), new StringDimensionSchema("metroCode"), new StringDimensionSchema("namespace"), new StringDimensionSchema("page"), new StringDimensionSchema("regionIsoCode"), new StringDimensionSchema("regionName"), new StringDimensionSchema("user"))), new AggregatorFactory[] { new CountAggregatorFactory("count"), new LongSumAggregatorFactory("added", "added") }, HadoopTuningConfig.makeDefaultTuningConfig(), null, null, null, null, null, false, objectMapper, null, null, null, null, null, new MaterializedViewTaskConfig(), EasyMock.createMock(AuthorizerMapper.class), new NoopChatHandlerProvider(), new SupervisorStateManagerConfig());
}
use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.
the class MaterializedViewSupervisorSpecTest method testSupervisorSerialization.
@Test
public void testSupervisorSerialization() throws IOException {
String supervisorStr = "{\n" + " \"type\" : \"derivativeDataSource\",\n" + " \"baseDataSource\": \"wikiticker\",\n" + " \"dimensionsSpec\":{\n" + " \"dimensions\" : [\n" + " \"isUnpatrolled\",\n" + " \"metroCode\",\n" + " \"namespace\",\n" + " \"page\",\n" + " \"regionIsoCode\",\n" + " \"regionName\",\n" + " \"user\"\n" + " ]\n" + " },\n" + " \"metricsSpec\" : [\n" + " {\n" + " \"name\" : \"count\",\n" + " \"type\" : \"count\"\n" + " },\n" + " {\n" + " \"name\" : \"added\",\n" + " \"type\" : \"longSum\",\n" + " \"fieldName\" : \"added\"\n" + " }\n" + " ],\n" + " \"tuningConfig\": {\n" + " \"type\" : \"hadoop\"\n" + " }\n" + "}";
MaterializedViewSupervisorSpec expected = new MaterializedViewSupervisorSpec("wikiticker", new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("isUnpatrolled"), new StringDimensionSchema("metroCode"), new StringDimensionSchema("namespace"), new StringDimensionSchema("page"), new StringDimensionSchema("regionIsoCode"), new StringDimensionSchema("regionName"), new StringDimensionSchema("user"))), new AggregatorFactory[] { new CountAggregatorFactory("count"), new LongSumAggregatorFactory("added", "added") }, HadoopTuningConfig.makeDefaultTuningConfig(), null, null, null, null, null, false, objectMapper, null, null, null, null, null, new MaterializedViewTaskConfig(), EasyMock.createMock(AuthorizerMapper.class), new NoopChatHandlerProvider(), new SupervisorStateManagerConfig());
MaterializedViewSupervisorSpec spec = objectMapper.readValue(supervisorStr, MaterializedViewSupervisorSpec.class);
Assert.assertEquals(expected.getBaseDataSource(), spec.getBaseDataSource());
Assert.assertEquals(expected.getId(), spec.getId());
Assert.assertEquals(expected.getDataSourceName(), spec.getDataSourceName());
Assert.assertEquals(expected.getDimensions(), spec.getDimensions());
Assert.assertEquals(expected.getMetrics(), spec.getMetrics());
}
use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.
the class KafkaIndexTaskTest method testKafkaInputFormat.
@Test(timeout = 60_000L)
public void testKafkaInputFormat() throws Exception {
// Insert data
insertData(Iterables.limit(records, 3));
final KafkaIndexTask task = createTask(null, new DataSchema("test_ds", new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim1t"), new StringDimensionSchema("dim2"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"), new StringDimensionSchema("kafka.testheader.encoding"))), new AggregatorFactory[] { new DoubleSumAggregatorFactory("met1sum", "met1"), new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, KAFKA_INPUT_FORMAT));
Assert.assertTrue(task.supportsQueries());
final ListenableFuture<TaskStatus> future = runTask(task);
while (countEvents(task) != 3) {
Thread.sleep(25);
}
Assert.assertEquals(Status.READING, task.getRunner().getStatus());
final QuerySegmentSpec interval = OBJECT_MAPPER.readValue("\"2008/2012\"", QuerySegmentSpec.class);
List<ScanResultValue> scanResultValues = scanData(task, interval);
// verify that there are no records indexed in the rollbacked time period
Assert.assertEquals(3, Iterables.size(scanResultValues));
int i = 0;
for (ScanResultValue result : scanResultValues) {
final Map<String, Object> event = ((List<Map<String, Object>>) result.getEvents()).get(0);
Assert.assertEquals("application/json", event.get("kafka.testheader.encoding"));
Assert.assertEquals("y", event.get("dim2"));
}
// insert remaining data
insertData(Iterables.skip(records, 3));
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
// Check metrics
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
}
use of org.apache.druid.data.input.impl.StringDimensionSchema in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
});
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
});
Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.
the class ProtobufParserBenchmark method setup.
@Setup
public void setup() {
nestedParseSpec = new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("event"), new StringDimensionSchema("id"), new StringDimensionSchema("someOtherId"), new StringDimensionSchema("isValid"))), new JSONPathSpec(true, Lists.newArrayList(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "eventType", "eventType"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "foobar", "$.foo.bar"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar0", "$.bar[0].bar"))), null, null);
flatParseSpec = new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Lists.newArrayList(new StringDimensionSchema("event"), new StringDimensionSchema("id"), new StringDimensionSchema("someOtherId"), new StringDimensionSchema("isValid"))), null, null, null);
decoder = new FileBasedProtobufBytesDecoder("prototest.desc", "ProtoTestEvent");
protoFilePath = "ProtoFile";
protoInputs = getProtoInputs(protoFilePath);
nestedParser = new ProtobufInputRowParser(nestedParseSpec, decoder, null, null);
flatParser = new ProtobufInputRowParser(flatParseSpec, decoder, null, null);
}
Aggregations