Search in sources :

Example 6 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class BatchDeltaIngestionTest method makeHadoopDruidIndexerConfig.

private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig(Map<String, Object> inputSpec, File tmpDir, AggregatorFactory[] aggregators) throws Exception {
    HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host"))), null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, 0), null), Map.class), aggregators != null ? aggregators : new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_num"), new HyperUniquesAggregatorFactory("unique_hosts", "host2") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, MAPPER), new HadoopIOConfig(inputSpec, null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, null, null, null, false, false, false, false, null, false, false, null, null, false, false, null, null, null, null, null)));
    config.setShardSpecs(ImmutableMap.of(INTERVAL_FULL.getStartMillis(), ImmutableList.of(new HadoopyShardSpec(new HashBasedNumberedShardSpec(0, 1, 0, 1, null, HashPartitionFunction.MURMUR3_32_ABS, HadoopDruidIndexerConfig.JSON_MAPPER), 0))));
    config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
    return config;
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 7 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest method testRunUpdateSegmentListIfDatasourcePathSpecIsUsed.

private HadoopDruidIndexerConfig testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(PathSpec datasourcePathSpec, @Nullable Interval jobInterval) throws Exception {
    HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P1D"))), null, jsonMapper), new HadoopIOConfig(jsonMapper.convertValue(datasourcePathSpec, Map.class), null, null), null);
    spec = jsonMapper.readValue(jsonMapper.writeValueAsString(spec), HadoopIngestionSpec.class);
    UsedSegmentsRetriever segmentsRetriever = EasyMock.createMock(UsedSegmentsRetriever.class);
    EasyMock.expect(segmentsRetriever.retrieveUsedSegmentsForIntervals(TEST_DATA_SOURCE, Collections.singletonList(jobInterval != null ? jobInterval.overlap(TEST_DATA_SOURCE_INTERVAL) : null), Segments.ONLY_VISIBLE)).andReturn(ImmutableList.of(SEGMENT));
    EasyMock.expect(segmentsRetriever.retrieveUsedSegmentsForIntervals(TEST_DATA_SOURCE2, Collections.singletonList(jobInterval != null ? jobInterval.overlap(TEST_DATA_SOURCE_INTERVAL2) : null), Segments.ONLY_VISIBLE)).andReturn(ImmutableList.of(SEGMENT2));
    EasyMock.replay(segmentsRetriever);
    HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(spec, jsonMapper, segmentsRetriever);
    return HadoopDruidIndexerConfig.fromString(jsonMapper.writeValueAsString(spec));
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UsedSegmentsRetriever(org.apache.druid.indexer.path.UsedSegmentsRetriever) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 8 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class GranularUnprocessedPathSpec method addInputPaths.

@Override
public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException {
    // This PathSpec breaks so many abstractions that we might as break some more
    Preconditions.checkState(config.getGranularitySpec() instanceof UniformGranularitySpec, StringUtils.format("Cannot use %s without %s", GranularUnprocessedPathSpec.class.getSimpleName(), UniformGranularitySpec.class.getSimpleName()));
    final Path betaInput = new Path(getInputPath());
    final FileSystem fs = betaInput.getFileSystem(job.getConfiguration());
    final Granularity segmentGranularity = config.getGranularitySpec().getSegmentGranularity();
    Map<Long, Long> inputModifiedTimes = new TreeMap<>(Ordering.natural().reverse());
    for (FileStatus status : FSSpideringIterator.spiderIterable(fs, betaInput)) {
        final DateTime key = segmentGranularity.toDate(status.getPath().toString());
        final Long currVal = inputModifiedTimes.get(key.getMillis());
        final long mTime = status.getModificationTime();
        inputModifiedTimes.put(key.getMillis(), currVal == null ? mTime : Math.max(currVal, mTime));
    }
    Set<Interval> bucketsToRun = new TreeSet<>(Comparators.intervals());
    for (Map.Entry<Long, Long> entry : inputModifiedTimes.entrySet()) {
        DateTime timeBucket = DateTimes.utc(entry.getKey());
        long mTime = entry.getValue();
        String bucketOutput = StringUtils.format("%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), segmentGranularity.toPath(timeBucket));
        for (FileStatus fileStatus : FSSpideringIterator.spiderIterable(fs, new Path(bucketOutput))) {
            if (fileStatus.getModificationTime() > mTime) {
                bucketsToRun.add(new Interval(timeBucket, segmentGranularity.increment(timeBucket)));
                break;
            }
        }
        if (bucketsToRun.size() >= maxBuckets) {
            break;
        }
    }
    config.setGranularitySpec(new UniformGranularitySpec(segmentGranularity, config.getGranularitySpec().getQueryGranularity(), config.getGranularitySpec().isRollup(), Lists.newArrayList(bucketsToRun)));
    return super.addInputPaths(config, job);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Granularity(org.apache.druid.java.util.common.granularity.Granularity) TreeMap(java.util.TreeMap) DateTime(org.joda.time.DateTime) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) TreeMap(java.util.TreeMap) Map(java.util.Map) Interval(org.joda.time.Interval)

Example 9 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class HadoopIngestionSpecTest method testPeriodSegmentGranularitySpec.

@Test
public void testPeriodSegmentGranularitySpec() {
    final HadoopIngestionSpec schema;
    try {
        schema = jsonReadWriteRead("{\n" + "    \"dataSchema\": {\n" + "     \"dataSource\": \"foo\",\n" + "     \"metricsSpec\": [],\n" + "        \"granularitySpec\": {\n" + "                \"type\": \"uniform\",\n" + "                \"segmentGranularity\": {\"type\": \"period\", \"period\":\"PT1H\", \"timeZone\":\"America/Los_Angeles\"},\n" + "                \"intervals\": [\"2012-01-01/P1D\"]\n" + "        }\n" + "    }\n" + "}", HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    final UniformGranularitySpec granularitySpec = (UniformGranularitySpec) schema.getDataSchema().getGranularitySpec();
    Assert.assertEquals("getSegmentGranularity", new PeriodGranularity(new Period("PT1H"), null, DateTimes.inferTzFromString("America/Los_Angeles")), granularitySpec.getSegmentGranularity());
}
Also used : UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Period(org.joda.time.Period) Test(org.junit.Test)

Example 10 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class IndexGeneratorCombinerTest method setUp.

@Before
public void setUp() throws Exception {
    HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host", "keywords")))), null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited"), new HyperUniquesAggregatorFactory("unique_hosts", "host") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(Intervals.of("2010/2011"))), null, HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.of("paths", "/tmp/dummy", "type", "static"), null, "/tmp/dummy"), HadoopTuningConfig.makeDefaultTuningConfig().withWorkingPath("/tmp/work").withVersion("ver")));
    Configuration hadoopConfig = new Configuration();
    hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
    Reducer.Context context = EasyMock.createMock(Reducer.Context.class);
    EasyMock.expect(context.getConfiguration()).andReturn(hadoopConfig);
    EasyMock.replay(context);
    aggregators = config.getSchema().getDataSchema().getAggregators();
    combiner = new IndexGeneratorJob.IndexGeneratorCombiner();
    combiner.setup(context);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Reducer(org.apache.hadoop.mapreduce.Reducer) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Before(org.junit.Before)

Aggregations

UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)91 Test (org.junit.Test)60 DataSchema (org.apache.druid.segment.indexing.DataSchema)49 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)36 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)35 DataSegment (org.apache.druid.timeline.DataSegment)33 File (java.io.File)25 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)24 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)22 Map (java.util.Map)20 Interval (org.joda.time.Interval)18 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)17 ArrayList (java.util.ArrayList)15 HashMap (java.util.HashMap)14 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 ImmutableMap (com.google.common.collect.ImmutableMap)12 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)12 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)12 CompactionState (org.apache.druid.timeline.CompactionState)12 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)12