use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project hive by apache.
the class DruidStorageHandlerUtils method getGranularitySpec.
public static GranularitySpec getGranularitySpec(Configuration configuration, Properties tableProperties) {
final String segmentGranularity = tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) : HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_INDEXING_GRANULARITY);
final boolean rollup = tableProperties.getProperty(DruidConstants.DRUID_ROLLUP) != null ? Boolean.parseBoolean(tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY)) : HiveConf.getBoolVar(configuration, HiveConf.ConfVars.HIVE_DRUID_ROLLUP);
return new UniformGranularitySpec(Granularity.fromString(segmentGranularity), Granularity.fromString(tableProperties.getProperty(DruidConstants.DRUID_QUERY_GRANULARITY) == null ? "NONE" : tableProperties.getProperty(DruidConstants.DRUID_QUERY_GRANULARITY)), rollup, null);
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class PartialHashSegmentGenerateTaskTest method testCreateHashPartitionAnalysisFromPartitionsSpecWithNumShardsReturningAnalysisOfValidNumBuckets.
@Test
public void testCreateHashPartitionAnalysisFromPartitionsSpecWithNumShardsReturningAnalysisOfValidNumBuckets() {
final List<Interval> intervals = ImmutableList.of(Intervals.of("2020-01-01/2020-01-02"), Intervals.of("2020-01-02/2020-01-03"), Intervals.of("2020-01-03/2020-01-04"));
final int expectedNumBuckets = 5;
final HashPartitionAnalysis partitionAnalysis = PartialHashSegmentGenerateTask.createHashPartitionAnalysisFromPartitionsSpec(new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, intervals), new HashedPartitionsSpec(null, expectedNumBuckets, null), null);
Assert.assertEquals(intervals.size(), partitionAnalysis.getNumTimePartitions());
for (Interval interval : intervals) {
Assert.assertEquals(expectedNumBuckets, partitionAnalysis.getBucketAnalysis(interval).intValue());
}
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class HashPartitionTaskKillTest method createTestTask.
private ParallelIndexSupervisorTask createTestTask(@Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @Nullable ParseSpec parseSpec, Interval interval, File inputDir, String filter, PartitionsSpec partitionsSpec, int maxNumConcurrentSubTasks, boolean appendToExisting, boolean useInputFormatApi, int succeedsBeforeFailing) {
GranularitySpec granularitySpec = new UniformGranularitySpec(SEGMENT_GRANULARITY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval));
ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, maxNumConcurrentSubTasks, !appendToExisting);
final ParallelIndexIngestionSpec ingestionSpec;
if (useInputFormatApi) {
Preconditions.checkArgument(parseSpec == null);
ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, filter), inputFormat, appendToExisting, null);
ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null), ioConfig, tuningConfig);
} else {
Preconditions.checkArgument(inputFormat == null);
ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(new LocalFirehoseFactory(inputDir, filter, null), appendToExisting);
// noinspection unchecked
ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema("dataSource", getObjectMapper().convertValue(new StringInputRowParser(parseSpec, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null, getObjectMapper()), ioConfig, tuningConfig);
}
return new ParallelIndexSupervisorTaskTest(null, null, null, ingestionSpec, null, Collections.emptyMap(), succeedsBeforeFailing);
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class SinglePhaseParallelIndexingTest method testIngestBothExplicitAndImplicitDims.
@Test
public void testIngestBothExplicitAndImplicitDims() throws IOException {
final Interval interval = Intervals.of("2017-12/P1M");
for (int i = 0; i < 5; i++) {
try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i + ".json").toPath(), StandardCharsets.UTF_8)) {
writer.write(getObjectMapper().writeValueAsString(ImmutableMap.of("ts", StringUtils.format("2017-12-%d", 24 + i), "implicitDim", "implicit_" + i, "explicitDim", "explicit_" + i)));
writer.write(getObjectMapper().writeValueAsString(ImmutableMap.of("ts", StringUtils.format("2017-12-%d", 25 + i), "implicitDim", "implicit_" + i, "explicitDim", "explicit_" + i)));
}
}
final ParallelIndexSupervisorTask task = new ParallelIndexSupervisorTask(null, null, null, new ParallelIndexIngestionSpec(new DataSchema("dataSource", DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.builder().setDefaultSchemaDimensions(ImmutableList.of("ts", "explicitDim")).setIncludeAllDimensions(true).build(), new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, Collections.singletonList(interval)), null), new ParallelIndexIOConfig(null, new SettableSplittableLocalInputSource(inputDir, "*.json", true), new JsonInputFormat(new JSONPathSpec(true, null), null, null), false, null), AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING), null);
task.addToContext(Tasks.FORCE_TIME_CHUNK_LOCK_KEY, lockGranularity == LockGranularity.TIME_CHUNK);
Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().runAndWait(task).getStatusCode());
Set<DataSegment> segments = getIndexingServiceClient().getPublishedSegments(task);
for (DataSegment segment : segments) {
Assert.assertEquals(ImmutableList.of("ts", "explicitDim", "implicitDim"), segment.getDimensions());
}
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class ITAutoCompactionTest method testAutoCompactionDutyWithRollup.
@Test
public void testAutoCompactionDutyWithRollup() throws Exception {
final ISOChronology chrono = ISOChronology.getInstance(DateTimes.inferTzFromString("America/Los_Angeles"));
Map<String, Object> specs = ImmutableMap.of("%%GRANULARITYSPEC%%", new UniformGranularitySpec(Granularities.DAY, Granularities.DAY, false, ImmutableList.of(new Interval("2013-08-31/2013-09-02", chrono))));
loadData(INDEX_TASK_WITH_GRANULARITY_SPEC, specs);
try (final Closeable ignored = unloader(fullDatasourceName)) {
Map<String, Object> expectedResult = ImmutableMap.of("%%FIELD_TO_QUERY%%", "added", "%%EXPECTED_COUNT_RESULT%%", 2, "%%EXPECTED_SCAN_RESULT%%", ImmutableList.of(ImmutableMap.of("events", ImmutableList.of(ImmutableList.of(57.0), ImmutableList.of(459.0)))));
verifyQuery(INDEX_ROLLUP_QUERIES_RESOURCE, expectedResult);
submitCompactionConfig(MAX_ROWS_PER_SEGMENT_COMPACTED, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(null, null, true), false);
forceTriggerAutoCompaction(2);
expectedResult = ImmutableMap.of("%%FIELD_TO_QUERY%%", "added", "%%EXPECTED_COUNT_RESULT%%", 1, "%%EXPECTED_SCAN_RESULT%%", ImmutableList.of(ImmutableMap.of("events", ImmutableList.of(ImmutableList.of(516.0)))));
verifyQuery(INDEX_ROLLUP_QUERIES_RESOURCE, expectedResult);
verifySegmentsCompacted(2, MAX_ROWS_PER_SEGMENT_COMPACTED);
List<TaskResponseObject> compactTasksBefore = indexer.getCompleteTasksForDataSource(fullDatasourceName);
// Verify rollup segments does not get compacted again
forceTriggerAutoCompaction(2);
List<TaskResponseObject> compactTasksAfter = indexer.getCompleteTasksForDataSource(fullDatasourceName);
Assert.assertEquals(compactTasksAfter.size(), compactTasksBefore.size());
}
}
Aggregations