use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class GranularityPathSpecTest method testIntervalTrimming.
@Test
public void testIntervalTrimming() throws Exception {
UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("test", new String[] { "testGroup" }));
HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, Granularities.ALL, ImmutableList.of(Intervals.of("2015-01-01T11Z/2015-01-02T05Z"))), null, jsonMapper), new HadoopIOConfig(null, null, null), DEFAULT_TUNING_CONFIG);
granularityPathSpec.setDataGranularity(Granularities.HOUR);
granularityPathSpec.setPathFormat("yyyy/MM/dd/HH");
granularityPathSpec.setFilePattern(".*");
granularityPathSpec.setInputFormat(TextInputFormat.class);
Job job = Job.getInstance();
String formatStr = "file:%s/%s;org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
createFile(testFolder, "test/2015/01/01/00/file1", "test/2015/01/01/10/file2", "test/2015/01/01/18/file3", "test/2015/01/02/00/file1", "test/2015/01/02/03/file2", "test/2015/01/02/05/file3", "test/2015/01/02/07/file4", "test/2015/01/02/09/file5");
granularityPathSpec.setInputPath(testFolder.getRoot().getPath() + "/test");
granularityPathSpec.addInputPaths(HadoopDruidIndexerConfig.fromSpec(spec), job);
String actual = job.getConfiguration().get("mapreduce.input.multipleinputs.dir.formats");
String expected = Joiner.on(",").join(Lists.newArrayList(StringUtils.format(formatStr, testFolder.getRoot(), "test/2015/01/01/18/file3"), StringUtils.format(formatStr, testFolder.getRoot(), "test/2015/01/02/00/file1"), StringUtils.format(formatStr, testFolder.getRoot(), "test/2015/01/02/03/file2")));
Assert.assertEquals("Did not find expected input paths", expected, actual);
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class CompactionTask method createDataSchema.
private static DataSchema createDataSchema(String dataSource, List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments, @Nullable DimensionsSpec dimensionsSpec, @Nullable ClientCompactionTaskTransformSpec transformSpec, @Nullable AggregatorFactory[] metricsSpec, @Nonnull ClientCompactionTaskGranularitySpec granularitySpec) {
// check index metadata &
// Decide which values to propagate (i.e. carry over) for rollup & queryGranularity
final SettableSupplier<Boolean> rollup = new SettableSupplier<>();
final SettableSupplier<Granularity> queryGranularity = new SettableSupplier<>();
decideRollupAndQueryGranularityCarryOver(rollup, queryGranularity, queryableIndexAndSegments);
final Interval totalInterval = JodaUtils.umbrellaInterval(queryableIndexAndSegments.stream().map(p -> p.rhs.getInterval()).collect(Collectors.toList()));
final Granularity queryGranularityToUse;
if (granularitySpec.getQueryGranularity() == null) {
queryGranularityToUse = queryGranularity.get();
log.info("Generate compaction task spec with segments original query granularity [%s]", queryGranularityToUse);
} else {
queryGranularityToUse = granularitySpec.getQueryGranularity();
log.info("Generate compaction task spec with new query granularity overrided from input [%s]", queryGranularityToUse);
}
final GranularitySpec uniformGranularitySpec = new UniformGranularitySpec(Preconditions.checkNotNull(granularitySpec.getSegmentGranularity()), queryGranularityToUse, granularitySpec.isRollup() == null ? rollup.get() : granularitySpec.isRollup(), Collections.singletonList(totalInterval));
// find unique dimensions
final DimensionsSpec finalDimensionsSpec = dimensionsSpec == null ? createDimensionsSpec(queryableIndexAndSegments) : dimensionsSpec;
final AggregatorFactory[] finalMetricsSpec = metricsSpec == null ? createMetricsSpec(queryableIndexAndSegments) : metricsSpec;
return new DataSchema(dataSource, new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null), finalDimensionsSpec, finalMetricsSpec, uniformGranularitySpec, transformSpec == null ? null : new TransformSpec(transformSpec.getFilter(), null));
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method makeRealtimeTask.
private AppenderatorDriverRealtimeIndexTask makeRealtimeTask(final String taskId, final TransformSpec transformSpec, final boolean reportParseExceptions, final long handoffTimeout, final Boolean logParseExceptions, final Integer maxParseExceptions, final Integer maxSavedParseExceptions, final Integer maxRowsPerSegment, final Long maxTotalRows) {
DataSchema dataSchema = new DataSchema("test_ds", TestHelper.makeJsonMapper().convertValue(new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("t", "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim2"), new StringDimensionSchema("dim1t"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"))))), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT), new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), transformSpec, OBJECT_MAPPER);
RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehoseFactory(), null);
RealtimeAppenderatorTuningConfig tuningConfig = new RealtimeAppenderatorTuningConfig(null, 1000, null, null, maxRowsPerSegment, maxTotalRows, null, null, null, null, null, null, reportParseExceptions, handoffTimeout, null, null, logParseExceptions, maxParseExceptions, maxSavedParseExceptions);
return new AppenderatorDriverRealtimeIndexTask(taskId, null, new RealtimeAppenderatorIngestionSpec(dataSchema, realtimeIOConfig, tuningConfig), null) {
@Override
protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
return true;
}
};
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class CompactionTaskParallelRunTest method testRunParallelWithHashPartitioningMatchCompactionState.
@Test
public void testRunParallelWithHashPartitioningMatchCompactionState() throws Exception {
// Hash partitioning is not supported with segment lock yet
Assume.assumeFalse(lockGranularity == LockGranularity.SEGMENT);
runIndexTask(null, true);
final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(newTuningConfig(new HashedPartitionsSpec(null, 3, null), 2, true)).build();
final Set<DataSegment> compactedSegments = runTask(compactionTask);
for (DataSegment segment : compactedSegments) {
// Expect compaction state to exist as store compaction state by default
Map<String, String> expectedLongSumMetric = new HashMap<>();
expectedLongSumMetric.put("type", "longSum");
expectedLongSumMetric.put("name", "val");
expectedLongSumMetric.put("fieldName", "val");
expectedLongSumMetric.put("expression", null);
Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
CompactionState expectedState = new CompactionState(new HashedPartitionsSpec(null, 3, null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
Assert.assertEquals(expectedState, segment.getLastCompactionState());
}
}
use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.
the class CompactionTaskParallelRunTest method testRunParallelWithMultiDimensionRangePartitioningWithSingleTask.
@Test
public void testRunParallelWithMultiDimensionRangePartitioningWithSingleTask() throws Exception {
// Range partitioning is not supported with segment lock yet
Assume.assumeFalse(lockGranularity == LockGranularity.SEGMENT);
runIndexTask(null, true);
final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(newTuningConfig(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), 1, true)).build();
final Set<DataSegment> compactedSegments = runTask(compactionTask);
for (DataSegment segment : compactedSegments) {
// Expect compaction state to exist as store compaction state by default
Map<String, String> expectedLongSumMetric = new HashMap<>();
expectedLongSumMetric.put("type", "longSum");
expectedLongSumMetric.put("name", "val");
expectedLongSumMetric.put("fieldName", "val");
expectedLongSumMetric.put("expression", null);
Assert.assertSame(DimensionRangeShardSpec.class, segment.getShardSpec().getClass());
CompactionState expectedState = new CompactionState(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
Assert.assertEquals(expectedState, segment.getLastCompactionState());
}
}
Aggregations