Search in sources :

Example 21 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class PartialRangeSegmentGenerateTask method getPartitionDimensions.

private static List<String> getPartitionDimensions(ParallelIndexIngestionSpec ingestionSpec) {
    PartitionsSpec partitionsSpec = ingestionSpec.getTuningConfig().getPartitionsSpec();
    Preconditions.checkArgument(partitionsSpec instanceof DimensionRangePartitionsSpec, "%s or %s partitionsSpec required", DimensionRangePartitionsSpec.NAME, SingleDimensionPartitionsSpec.NAME);
    DimensionRangePartitionsSpec multiDimPartitionsSpec = (DimensionRangePartitionsSpec) partitionsSpec;
    List<String> partitionDimensions = multiDimPartitionsSpec.getPartitionDimensions();
    Preconditions.checkNotNull(partitionDimensions, "partitionDimension required");
    return partitionDimensions;
}
Also used : SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec)

Example 22 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class ITPerfectRollupParallelIndexTest method testIndexData.

@Test(dataProvider = "resources")
public void testIndexData(PartitionsSpec partitionsSpec) throws Exception {
    try (final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
        final Closeable ignored2 = unloader(INDEX_INGEST_SEGMENT_DATASOURCE + config.getExtraDatasourceNameSuffix());
        final Closeable ignored3 = unloader(INDEX_DRUID_INPUT_SOURCE_DATASOURCE + config.getExtraDatasourceNameSuffix())) {
        boolean forceGuaranteedRollup = partitionsSpec.isForceGuaranteedRollupCompatible();
        Assert.assertTrue(forceGuaranteedRollup, "parititionSpec does not support perfect rollup");
        final Function<String, String> rollupTransform = spec -> {
            try {
                spec = StringUtils.replace(spec, "%%FORCE_GUARANTEED_ROLLUP%%", Boolean.toString(true));
                spec = StringUtils.replace(spec, "%%SEGMENT_AVAIL_TIMEOUT_MILLIS%%", jsonMapper.writeValueAsString("0"));
                return StringUtils.replace(spec, "%%PARTITIONS_SPEC%%", jsonMapper.writeValueAsString(partitionsSpec));
            } catch (JsonProcessingException e) {
                throw new RuntimeException(e);
            }
        };
        doIndexTest(INDEX_DATASOURCE, INDEX_TASK, rollupTransform, INDEX_QUERIES_RESOURCE, false, true, true, new Pair<>(false, false));
        doReindexTest(INDEX_DATASOURCE, INDEX_INGEST_SEGMENT_DATASOURCE, rollupTransform, INDEX_INGEST_SEGMENT_TASK, INDEX_QUERIES_RESOURCE, new Pair<>(false, false));
        // with DruidInputSource instead of IngestSegmentFirehose
        doReindexTest(INDEX_DATASOURCE, INDEX_DRUID_INPUT_SOURCE_DATASOURCE, rollupTransform, INDEX_DRUID_INPUT_SOURCE_TASK, INDEX_QUERIES_RESOURCE, new Pair<>(false, false));
    }
}
Also used : DataProvider(org.testng.annotations.DataProvider) StringUtils(org.apache.druid.java.util.common.StringUtils) DruidTestModuleFactory(org.apache.druid.testing.guice.DruidTestModuleFactory) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Test(org.testng.annotations.Test) Function(java.util.function.Function) Guice(org.testng.annotations.Guice) Pair(org.apache.druid.java.util.common.Pair) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) TestNGGroup(org.apache.druid.tests.TestNGGroup) Assert(org.testng.Assert) Closeable(java.io.Closeable) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Closeable(java.io.Closeable) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Test(org.testng.annotations.Test)

Example 23 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class ParallelIndexSupervisorTaskSerdeTest method forceGuaranteedRollupWithHashPartitionsMissingNumShards.

@Test
public void forceGuaranteedRollupWithHashPartitionsMissingNumShards() {
    Integer numShards = null;
    ParallelIndexSupervisorTask task = new ParallelIndexSupervisorTaskBuilder().ingestionSpec(new ParallelIndexIngestionSpecBuilder().forceGuaranteedRollup(true).partitionsSpec(new HashedPartitionsSpec(null, numShards, null)).inputIntervals(INTERVALS).build()).build();
    PartitionsSpec partitionsSpec = task.getIngestionSchema().getTuningConfig().getPartitionsSpec();
    Assert.assertThat(partitionsSpec, CoreMatchers.instanceOf(HashedPartitionsSpec.class));
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Test(org.junit.Test)

Example 24 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class ParallelIndexSupervisorTaskSerdeTest method forceGuaranteedRollupWithSingleDimPartitionsValid.

@Test
public void forceGuaranteedRollupWithSingleDimPartitionsValid() {
    ParallelIndexSupervisorTask task = new ParallelIndexSupervisorTaskBuilder().ingestionSpec(new ParallelIndexIngestionSpecBuilder().forceGuaranteedRollup(true).partitionsSpec(new SingleDimensionPartitionsSpec(1, null, "a", true)).inputIntervals(INTERVALS).build()).build();
    PartitionsSpec partitionsSpec = task.getIngestionSchema().getTuningConfig().getPartitionsSpec();
    Assert.assertThat(partitionsSpec, CoreMatchers.instanceOf(SingleDimensionPartitionsSpec.class));
}
Also used : PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Test(org.junit.Test)

Example 25 with PartitionsSpec

use of org.apache.druid.indexer.partitions.PartitionsSpec in project druid by druid-io.

the class DetermineHashedPartitionsJob method run.

@Override
public boolean run() {
    try {
        /*
       * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
       * in the final segment.
       */
        startTime = System.currentTimeMillis();
        groupByJob = Job.getInstance(new Configuration(), StringUtils.format("%s-determine_partitions_hashed-%s", config.getDataSource(), config.getIntervals()));
        JobHelper.injectSystemProperties(groupByJob.getConfiguration(), config);
        config.addJobProperties(groupByJob);
        groupByJob.setMapperClass(DetermineCardinalityMapper.class);
        groupByJob.setMapOutputKeyClass(LongWritable.class);
        groupByJob.setMapOutputValueClass(BytesWritable.class);
        groupByJob.setReducerClass(DetermineCardinalityReducer.class);
        groupByJob.setOutputKeyClass(NullWritable.class);
        groupByJob.setOutputValueClass(NullWritable.class);
        groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        groupByJob.setPartitionerClass(DetermineHashedPartitionsPartitioner.class);
        if (config.getInputIntervals().isEmpty()) {
            groupByJob.setNumReduceTasks(1);
        } else {
            groupByJob.setNumReduceTasks(Iterators.size(config.getSegmentGranularIntervals().iterator()));
        }
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob);
        config.addInputPaths(groupByJob);
        config.intoConfiguration(groupByJob);
        FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());
        groupByJob.submit();
        log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL());
        // Store the jobId in the file
        if (groupByJob.getJobID() != null) {
            JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString());
        }
        try {
            if (!groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", groupByJob.getJobID());
                failureCause = Utils.getFailureMessage(groupByJob, HadoopDruidIndexerConfig.JSON_MAPPER);
                return false;
            }
        } catch (IOException ioe) {
            if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob, config.isUseYarnRMJobStatusFallback())) {
                throw ioe;
            }
        }
        /*
       * Load partitions and intervals determined by the previous job.
       */
        log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        if (config.getInputIntervals().isEmpty()) {
            final Path intervalInfoPath = config.makeIntervalInfoPath();
            fileSystem = intervalInfoPath.getFileSystem(groupByJob.getConfiguration());
            if (!Utils.exists(groupByJob, fileSystem, intervalInfoPath)) {
                throw new ISE("Path[%s] didn't exist!?", intervalInfoPath);
            }
            List<Interval> intervals = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(Utils.openInputStream(groupByJob, intervalInfoPath), new TypeReference<List<Interval>>() {
            });
            config.setGranularitySpec(new UniformGranularitySpec(config.getGranularitySpec().getSegmentGranularity(), config.getGranularitySpec().getQueryGranularity(), config.getGranularitySpec().isRollup(), intervals));
            log.info("Determined Intervals for Job [%s].", config.getSegmentGranularIntervals());
        }
        Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>(DateTimeComparator.getInstance());
        PartitionsSpec partitionsSpec = config.getPartitionsSpec();
        if (!(partitionsSpec instanceof HashedPartitionsSpec)) {
            throw new ISE("%s is expected, but got %s", HashedPartitionsSpec.class.getName(), partitionsSpec.getClass().getName());
        }
        HashPartitionFunction partitionFunction = ((HashedPartitionsSpec) partitionsSpec).getPartitionFunction();
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
            DateTime bucket = segmentGranularity.getStart();
            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(groupByJob.getConfiguration());
            }
            if (Utils.exists(groupByJob, fileSystem, partitionInfoPath)) {
                final Long numRows = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(Utils.openInputStream(groupByJob, partitionInfoPath), Long.class);
                log.info("Found approximately [%,d] rows in data.", numRows);
                final int numberOfShards = (int) Math.ceil((double) numRows / config.getTargetPartitionSize());
                log.info("Creating [%,d] shards", numberOfShards);
                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(numberOfShards);
                for (int i = 0; i < numberOfShards; ++i) {
                    actualSpecs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, numberOfShards, i, numberOfShards, null, partitionFunction, HadoopDruidIndexerConfig.JSON_MAPPER), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
                }
                shardSpecs.put(bucket.getMillis(), actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);
        log.info("DetermineHashedPartitionsJob took %d millis", (System.currentTimeMillis() - startTime));
        return true;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) TreeMap(java.util.TreeMap) DateTime(org.joda.time.DateTime) IOException(java.io.IOException) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) FileSystem(org.apache.hadoop.fs.FileSystem) ISE(org.apache.druid.java.util.common.ISE) ArrayList(java.util.ArrayList) List(java.util.List) Interval(org.joda.time.Interval)

Aggregations

PartitionsSpec (org.apache.druid.indexer.partitions.PartitionsSpec)34 Test (org.junit.Test)19 Map (java.util.Map)17 ArrayList (java.util.ArrayList)16 DataSegment (org.apache.druid.timeline.DataSegment)16 Period (org.joda.time.Period)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)15 IndexSpec (org.apache.druid.segment.IndexSpec)15 CompactionState (org.apache.druid.timeline.CompactionState)14 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)11 UserCompactionTaskGranularityConfig (org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig)11 SingleDimensionPartitionsSpec (org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec)10 StringUtils (org.apache.druid.java.util.common.StringUtils)9 Function (java.util.function.Function)8 IOException (java.io.IOException)7 List (java.util.List)7 Pair (org.apache.druid.java.util.common.Pair)5 Interval (org.joda.time.Interval)5 Test (org.testng.annotations.Test)5