Search in sources :

Example 21 with UniformGranularitySpec

use of io.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class TaskLifecycleTest method testResumeTasks.

@Test
public void testResumeTasks() throws Exception {
    final Task indexTask = new IndexTask(null, null, new IndexTask.IndexIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P2D"))), mapper), new IndexTask.IndexIOConfig(new MockFirehoseFactory(false), false, null), new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, null, false, null, null)), null, MAPPER);
    final long startTime = System.currentTimeMillis();
    // manually insert the task into TaskStorage, waiting for TaskQueue to sync from storage
    taskQueue.start();
    taskStorage.insert(indexTask, TaskStatus.running(indexTask.getId()));
    while (tsqa.getStatus(indexTask.getId()).get().isRunnable()) {
        if (System.currentTimeMillis() > startTime + 10 * 1000) {
            throw new ISE("Where did the task go?!: %s", indexTask.getId());
        }
        Thread.sleep(100);
    }
    final TaskStatus status = taskStorage.getStatus(indexTask.getId()).get();
    final List<DataSegment> publishedSegments = byIntervalOrdering.sortedCopy(mdc.getPublished());
    final List<DataSegment> loggedSegments = byIntervalOrdering.sortedCopy(tsqa.getInsertedSegments(indexTask.getId()));
    Assert.assertEquals("statusCode", TaskStatus.Status.SUCCESS, status.getStatusCode());
    Assert.assertEquals("segments logged vs published", loggedSegments, publishedSegments);
    Assert.assertEquals("num segments published", 2, mdc.getPublished().size());
    Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size());
    Assert.assertEquals("segment1 datasource", "foo", publishedSegments.get(0).getDataSource());
    Assert.assertEquals("segment1 interval", new Interval("2010-01-01/P1D"), publishedSegments.get(0).getInterval());
    Assert.assertEquals("segment1 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(0).getDimensions());
    Assert.assertEquals("segment1 metrics", ImmutableList.of("met"), publishedSegments.get(0).getMetrics());
    Assert.assertEquals("segment2 datasource", "foo", publishedSegments.get(1).getDataSource());
    Assert.assertEquals("segment2 interval", new Interval("2010-01-02/P1D"), publishedSegments.get(1).getInterval());
    Assert.assertEquals("segment2 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(1).getDimensions());
    Assert.assertEquals("segment2 metrics", ImmutableList.of("met"), publishedSegments.get(1).getMetrics());
}
Also used : IndexTask(io.druid.indexing.common.task.IndexTask) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) AbstractFixedIntervalTask(io.druid.indexing.common.task.AbstractFixedIntervalTask) KillTask(io.druid.indexing.common.task.KillTask) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) IndexTask(io.druid.indexing.common.task.IndexTask) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) ISE(io.druid.java.util.common.ISE) Interval(org.joda.time.Interval) FireDepartmentTest(io.druid.segment.realtime.FireDepartmentTest) Test(org.junit.Test)

Example 22 with UniformGranularitySpec

use of io.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class DetermineHashedPartitionsJob method run.

public boolean run() {
    try {
        /*
       * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
       * in the final segment.
       */
        long startTime = System.currentTimeMillis();
        final Job groupByJob = Job.getInstance(new Configuration(), String.format("%s-determine_partitions_hashed-%s", config.getDataSource(), config.getIntervals()));
        JobHelper.injectSystemProperties(groupByJob);
        config.addJobProperties(groupByJob);
        groupByJob.setMapperClass(DetermineCardinalityMapper.class);
        groupByJob.setMapOutputKeyClass(LongWritable.class);
        groupByJob.setMapOutputValueClass(BytesWritable.class);
        groupByJob.setReducerClass(DetermineCardinalityReducer.class);
        groupByJob.setOutputKeyClass(NullWritable.class);
        groupByJob.setOutputValueClass(NullWritable.class);
        groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        groupByJob.setPartitionerClass(DetermineHashedPartitionsPartitioner.class);
        if (!config.getSegmentGranularIntervals().isPresent()) {
            groupByJob.setNumReduceTasks(1);
        } else {
            groupByJob.setNumReduceTasks(config.getSegmentGranularIntervals().get().size());
        }
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob);
        config.addInputPaths(groupByJob);
        config.intoConfiguration(groupByJob);
        FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());
        groupByJob.submit();
        log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL());
        if (!groupByJob.waitForCompletion(true)) {
            log.error("Job failed: %s", groupByJob.getJobID());
            return false;
        }
        /*
       * Load partitions and intervals determined by the previous job.
       */
        log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        if (!config.getSegmentGranularIntervals().isPresent()) {
            final Path intervalInfoPath = config.makeIntervalInfoPath();
            fileSystem = intervalInfoPath.getFileSystem(groupByJob.getConfiguration());
            if (!Utils.exists(groupByJob, fileSystem, intervalInfoPath)) {
                throw new ISE("Path[%s] didn't exist!?", intervalInfoPath);
            }
            List<Interval> intervals = config.JSON_MAPPER.readValue(Utils.openInputStream(groupByJob, intervalInfoPath), new TypeReference<List<Interval>>() {
            });
            config.setGranularitySpec(new UniformGranularitySpec(config.getGranularitySpec().getSegmentGranularity(), config.getGranularitySpec().getQueryGranularity(), config.getGranularitySpec().isRollup(), intervals));
            log.info("Determined Intervals for Job [%s].", config.getSegmentGranularIntervals());
        }
        Map<Long, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
            DateTime bucket = segmentGranularity.getStart();
            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(groupByJob.getConfiguration());
            }
            if (Utils.exists(groupByJob, fileSystem, partitionInfoPath)) {
                final Long numRows = config.JSON_MAPPER.readValue(Utils.openInputStream(groupByJob, partitionInfoPath), new TypeReference<Long>() {
                });
                log.info("Found approximately [%,d] rows in data.", numRows);
                final int numberOfShards = (int) Math.ceil((double) numRows / config.getTargetPartitionSize());
                log.info("Creating [%,d] shards", numberOfShards);
                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(numberOfShards);
                if (numberOfShards == 1) {
                    actualSpecs.add(new HadoopyShardSpec(NoneShardSpec.instance(), shardCount++));
                } else {
                    for (int i = 0; i < numberOfShards; ++i) {
                        actualSpecs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, numberOfShards, null, HadoopDruidIndexerConfig.JSON_MAPPER), shardCount++));
                        log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
                    }
                }
                shardSpecs.put(bucket.getMillis(), actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);
        log.info("DetermineHashedPartitionsJob took %d millis", (System.currentTimeMillis() - startTime));
        return true;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) Configuration(org.apache.hadoop.conf.Configuration) DateTime(org.joda.time.DateTime) IOException(java.io.IOException) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) FileSystem(org.apache.hadoop.fs.FileSystem) ISE(io.druid.java.util.common.ISE) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) Interval(org.joda.time.Interval)

Example 23 with UniformGranularitySpec

use of io.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class RealtimeIndexTaskTest method makeRealtimeTask.

private RealtimeIndexTask makeRealtimeTask(final String taskId, boolean reportParseExceptions, long handoffTimeout) {
    ObjectMapper objectMapper = new DefaultObjectMapper();
    DataSchema dataSchema = new DataSchema("test_ds", null, new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), objectMapper);
    RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehoseFactory(), null, null);
    RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(1000, new Period("P1Y"), new Period("PT10M"), null, null, new ServerTimeRejectionPolicyFactory(), null, null, null, buildV9Directly, 0, 0, reportParseExceptions, handoffTimeout);
    return new RealtimeIndexTask(taskId, null, new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig), null) {

        @Override
        protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
            return true;
        }
    };
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) FirehoseFactory(io.druid.data.input.FirehoseFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Period(org.joda.time.Period) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) FireDepartment(io.druid.segment.realtime.FireDepartment) ServerTimeRejectionPolicyFactory(io.druid.segment.realtime.plumber.ServerTimeRejectionPolicyFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 24 with UniformGranularitySpec

use of io.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class IndexTaskTest method testIntervalBucketing.

@Test
public void testIntervalBucketing() throws Exception {
    File tmpDir = temporaryFolder.newFolder();
    File tmpFile = File.createTempFile("druid", "index", tmpDir);
    PrintWriter writer = new PrintWriter(tmpFile);
    writer.println("2015-03-01T07:59:59.977Z,a,1");
    writer.println("2015-03-01T08:00:00.000Z,b,1");
    writer.close();
    IndexTask indexTask = new IndexTask(null, null, createIngestionSpec(tmpDir, new UniformGranularitySpec(Granularities.HOUR, Granularities.HOUR, Arrays.asList(new Interval("2015-03-01T08:00:00Z/2015-03-01T09:00:00Z"))), 50, null, false, false), null, jsonMapper);
    final List<DataSegment> segments = runTask(indexTask);
    Assert.assertEquals(1, segments.size());
}
Also used : UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) File(java.io.File) DataSegment(io.druid.timeline.DataSegment) PrintWriter(java.io.PrintWriter) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 25 with UniformGranularitySpec

use of io.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class IndexTaskTest method testIntervalNotSpecified.

@Test
public void testIntervalNotSpecified() throws Exception {
    File tmpDir = temporaryFolder.newFolder();
    File tmpFile = File.createTempFile("druid", "index", tmpDir);
    PrintWriter writer = new PrintWriter(tmpFile);
    writer.println("2014-01-01T00:00:10Z,a,1");
    writer.println("2014-01-01T01:00:20Z,b,1");
    writer.println("2014-01-01T02:00:30Z,c,1");
    writer.close();
    IndexTask indexTask = new IndexTask(null, null, createIngestionSpec(tmpDir, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), 2, null, false, false), null, jsonMapper);
    final List<DataSegment> segments = runTask(indexTask);
    Assert.assertEquals(3, segments.size());
    Assert.assertEquals("test", segments.get(0).getDataSource());
    Assert.assertEquals(new Interval("2014-01-01T00/PT1H"), segments.get(0).getInterval());
    Assert.assertTrue(segments.get(0).getShardSpec().getClass().equals(NoneShardSpec.class));
    Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
    Assert.assertEquals("test", segments.get(1).getDataSource());
    Assert.assertEquals(new Interval("2014-01-01T01/PT1H"), segments.get(1).getInterval());
    Assert.assertTrue(segments.get(1).getShardSpec().getClass().equals(NoneShardSpec.class));
    Assert.assertEquals(0, segments.get(1).getShardSpec().getPartitionNum());
    Assert.assertEquals("test", segments.get(2).getDataSource());
    Assert.assertEquals(new Interval("2014-01-01T02/PT1H"), segments.get(2).getInterval());
    Assert.assertTrue(segments.get(2).getShardSpec().getClass().equals(NoneShardSpec.class));
    Assert.assertEquals(0, segments.get(2).getShardSpec().getPartitionNum());
}
Also used : UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) File(java.io.File) DataSegment(io.druid.timeline.DataSegment) PrintWriter(java.io.PrintWriter) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)33 DataSchema (io.druid.segment.indexing.DataSchema)27 Interval (org.joda.time.Interval)21 Test (org.junit.Test)19 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)9 File (java.io.File)9 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)8 TimestampSpec (io.druid.data.input.impl.TimestampSpec)8 RealtimeTuningConfig (io.druid.segment.indexing.RealtimeTuningConfig)8 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)7 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)7 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)7 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)7 Period (org.joda.time.Period)7 Before (org.junit.Before)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)6 RealtimeIOConfig (io.druid.segment.indexing.RealtimeIOConfig)6 DateTime (org.joda.time.DateTime)6 ImmutableMap (com.google.common.collect.ImmutableMap)5