use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class ITAutoCompactionTest method testAutoCompactionDutyWithOverlappingInterval.
@Test
public void testAutoCompactionDutyWithOverlappingInterval() throws Exception {
final ISOChronology chrono = ISOChronology.getInstance(DateTimes.inferTzFromString("America/Los_Angeles"));
Map<String, Object> specs = ImmutableMap.of("%%GRANULARITYSPEC%%", new UniformGranularitySpec(Granularities.WEEK, Granularities.NONE, false, ImmutableList.of(new Interval("2013-08-31/2013-09-02", chrono))));
// Create WEEK segment with 2013-08-26 to 2013-09-02
loadData(INDEX_TASK_WITH_GRANULARITY_SPEC, specs);
specs = ImmutableMap.of("%%GRANULARITYSPEC%%", new UniformGranularitySpec(Granularities.MONTH, Granularities.NONE, false, ImmutableList.of(new Interval("2013-09-01/2013-09-02", chrono))));
// Create MONTH segment with 2013-09-01 to 2013-10-01
loadData(INDEX_TASK_WITH_GRANULARITY_SPEC, specs);
try (final Closeable ignored = unloader(fullDatasourceName)) {
verifySegmentsCount(2);
// Result is not rollup
// For dim "page", result has values "Gypsy Danger" and "Striker Eureka"
Map<String, Object> expectedResult = ImmutableMap.of("%%FIELD_TO_QUERY%%", "added", "%%EXPECTED_COUNT_RESULT%%", 2, "%%EXPECTED_SCAN_RESULT%%", ImmutableList.of(ImmutableMap.of("events", ImmutableList.of(ImmutableList.of(57.0), ImmutableList.of(459.0)))));
verifyQuery(INDEX_ROLLUP_QUERIES_RESOURCE, expectedResult);
submitCompactionConfig(MAX_ROWS_PER_SEGMENT_COMPACTED, NO_SKIP_OFFSET, null, null, null, null, false);
// Compact the MONTH segment
forceTriggerAutoCompaction(2);
verifyQuery(INDEX_ROLLUP_QUERIES_RESOURCE, expectedResult);
// Compact the WEEK segment
forceTriggerAutoCompaction(2);
verifyQuery(INDEX_ROLLUP_QUERIES_RESOURCE, expectedResult);
// Verify all task succeed
List<TaskResponseObject> compactTasksBefore = indexer.getCompleteTasksForDataSource(fullDatasourceName);
for (TaskResponseObject taskResponseObject : compactTasksBefore) {
Assert.assertEquals(TaskState.SUCCESS, taskResponseObject.getStatus());
}
// Verify compacted segments does not get compacted again
forceTriggerAutoCompaction(2);
List<TaskResponseObject> compactTasksAfter = indexer.getCompleteTasksForDataSource(fullDatasourceName);
Assert.assertEquals(compactTasksAfter.size(), compactTasksBefore.size());
}
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class ITAutoCompactionTest method testAutoCompactionDutyWithQueryGranularity.
@Test
public void testAutoCompactionDutyWithQueryGranularity() throws Exception {
final ISOChronology chrono = ISOChronology.getInstance(DateTimes.inferTzFromString("America/Los_Angeles"));
Map<String, Object> specs = ImmutableMap.of("%%GRANULARITYSPEC%%", new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, true, ImmutableList.of(new Interval("2013-08-31/2013-09-02", chrono))));
loadData(INDEX_TASK_WITH_GRANULARITY_SPEC, specs);
try (final Closeable ignored = unloader(fullDatasourceName)) {
Map<String, Object> expectedResult = ImmutableMap.of("%%FIELD_TO_QUERY%%", "added", "%%EXPECTED_COUNT_RESULT%%", 2, "%%EXPECTED_SCAN_RESULT%%", ImmutableList.of(ImmutableMap.of("events", ImmutableList.of(ImmutableList.of(57.0), ImmutableList.of(459.0)))));
verifyQuery(INDEX_ROLLUP_QUERIES_RESOURCE, expectedResult);
submitCompactionConfig(MAX_ROWS_PER_SEGMENT_COMPACTED, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(null, Granularities.DAY, null), false);
forceTriggerAutoCompaction(2);
expectedResult = ImmutableMap.of("%%FIELD_TO_QUERY%%", "added", "%%EXPECTED_COUNT_RESULT%%", 1, "%%EXPECTED_SCAN_RESULT%%", ImmutableList.of(ImmutableMap.of("events", ImmutableList.of(ImmutableList.of(516.0)))));
verifyQuery(INDEX_ROLLUP_QUERIES_RESOURCE, expectedResult);
verifySegmentsCompacted(2, MAX_ROWS_PER_SEGMENT_COMPACTED);
List<TaskResponseObject> compactTasksBefore = indexer.getCompleteTasksForDataSource(fullDatasourceName);
// Verify rollup segments does not get compacted again
forceTriggerAutoCompaction(2);
List<TaskResponseObject> compactTasksAfter = indexer.getCompleteTasksForDataSource(fullDatasourceName);
Assert.assertEquals(compactTasksAfter.size(), compactTasksBefore.size());
}
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class KafkaIndexTaskTest method testKafkaInputFormat.
@Test(timeout = 60_000L)
public void testKafkaInputFormat() throws Exception {
// Insert data
insertData(Iterables.limit(records, 3));
final KafkaIndexTask task = createTask(null, new DataSchema("test_ds", new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim1t"), new StringDimensionSchema("dim2"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"), new StringDimensionSchema("kafka.testheader.encoding"))), new AggregatorFactory[] { new DoubleSumAggregatorFactory("met1sum", "met1"), new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, KAFKA_INPUT_FORMAT));
Assert.assertTrue(task.supportsQueries());
final ListenableFuture<TaskStatus> future = runTask(task);
while (countEvents(task) != 3) {
Thread.sleep(25);
}
Assert.assertEquals(Status.READING, task.getRunner().getStatus());
final QuerySegmentSpec interval = OBJECT_MAPPER.readValue("\"2008/2012\"", QuerySegmentSpec.class);
List<ScanResultValue> scanResultValues = scanData(task, interval);
// verify that there are no records indexed in the rollbacked time period
Assert.assertEquals(3, Iterables.size(scanResultValues));
int i = 0;
for (ScanResultValue result : scanResultValues) {
final Map<String, Object> event = ((List<Map<String, Object>>) result.getEvents()).get(0);
Assert.assertEquals("application/json", event.get("kafka.testheader.encoding"));
Assert.assertEquals("y", event.get("dim2"));
}
// insert remaining data
insertData(Iterables.skip(records, 3));
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
// Check metrics
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class KafkaSupervisorTest method getDataSchema.
private static DataSchema getDataSchema(String dataSource) {
List<DimensionSchema> dimensions = new ArrayList<>();
dimensions.add(StringDimensionSchema.create("dim1"));
dimensions.add(StringDimensionSchema.create("dim2"));
return new DataSchema(dataSource, new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.of()), null);
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project hive by apache.
the class TestDruidRecordWriter method testWrite.
// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
final String dataSourceName = "testDataSource";
final File segmentOutputDir = temporaryFolder.newFolder();
final File workingDir = temporaryFolder.newFolder();
Configuration config = new Configuration();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
});
DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {
@Override
public File getStorageDirectory() {
return segmentOutputDir;
}
});
Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
for (DruidWritable druidWritable : druidWritables) {
druidRecordWriter.write(druidWritable);
}
druidRecordWriter.close(false);
List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
Assert.assertEquals(1, dataSegmentList.size());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRows, rows);
}
Aggregations