Search in sources :

Example 6 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class BatchDeltaIngestionTest method makeHadoopDruidIndexerConfig.

private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig(Map<String, Object> inputSpec, File tmpDir, AggregatorFactory[] aggregators) throws Exception {
    HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host"))), null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, 0), null), Map.class), aggregators != null ? aggregators : new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_num"), new HyperUniquesAggregatorFactory("unique_hosts", "host2") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, MAPPER), new HadoopIOConfig(inputSpec, null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, null, null, null, false, false, false, false, null, false, false, null, null, false, false, null, null, null, null, null)));
    config.setShardSpecs(ImmutableMap.of(INTERVAL_FULL.getStartMillis(), ImmutableList.of(new HadoopyShardSpec(new HashBasedNumberedShardSpec(0, 1, 0, 1, null, HashPartitionFunction.MURMUR3_32_ABS, HadoopDruidIndexerConfig.JSON_MAPPER), 0))));
    config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
    return config;
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 7 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class HashPartitionAdjustingCorePartitionSizeTest method testEqualNumberOfPartitionsToBuckets.

@Test
public void testEqualNumberOfPartitionsToBuckets() throws IOException {
    final File inputDir = temporaryFolder.newFolder();
    for (int i = 0; i < 10; i++) {
        try (final Writer writer = Files.newBufferedWriter(new File(inputDir, "test_" + i).toPath(), StandardCharsets.UTF_8)) {
            writer.write(StringUtils.format("2020-01-01T00:00:00,%s,b1,%d\n", "aa" + (i + 10), 10 * (i + 1)));
        }
    }
    final DimensionBasedPartitionsSpec partitionsSpec = new HashedPartitionsSpec(null, 5, ImmutableList.of("dim1"));
    final Set<DataSegment> segments = runTestTask(TIMESTAMP_SPEC, DIMENSIONS_SPEC, INPUT_FORMAT, null, INTERVAL_TO_INDEX, inputDir, "test_*", partitionsSpec, maxNumConcurrentSubTasks, TaskState.SUCCESS);
    Assert.assertEquals(5, segments.size());
    segments.forEach(segment -> {
        Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
        final HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
        Assert.assertEquals(5, shardSpec.getNumCorePartitions());
        Assert.assertEquals(5, shardSpec.getNumBuckets());
        Assert.assertEquals(ImmutableList.of("dim1"), shardSpec.getPartitionDimensions());
    });
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) DimensionBasedPartitionsSpec(org.apache.druid.indexer.partitions.DimensionBasedPartitionsSpec) File(java.io.File) DataSegment(org.apache.druid.timeline.DataSegment) Writer(java.io.Writer) Test(org.junit.Test)

Example 8 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class HashPartitionMultiPhaseParallelIndexingTest method testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend.

@Test
public void testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend() {
    final Set<DataSegment> publishedSegments = new HashSet<>();
    publishedSegments.addAll(runTestTask(new HashedPartitionsSpec(null, numShards, ImmutableList.of("dim1", "dim2")), TaskState.SUCCESS, false));
    // Append
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(5, null), TaskState.SUCCESS, true));
    // And append again
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segments = entry.getValue();
        final List<DataSegment> hashedSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == HashBasedNumberedShardSpec.class).collect(Collectors.toList());
        final List<DataSegment> linearSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == NumberedShardSpec.class).collect(Collectors.toList());
        for (DataSegment hashedSegment : hashedSegments) {
            final HashBasedNumberedShardSpec hashShardSpec = (HashBasedNumberedShardSpec) hashedSegment.getShardSpec();
            for (DataSegment linearSegment : linearSegments) {
                Assert.assertEquals(hashedSegment.getInterval(), linearSegment.getInterval());
                Assert.assertEquals(hashedSegment.getVersion(), linearSegment.getVersion());
                final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) linearSegment.getShardSpec();
                Assert.assertEquals(hashShardSpec.getNumCorePartitions(), numberedShardSpec.getNumCorePartitions());
                Assert.assertTrue(hashShardSpec.getPartitionNum() < numberedShardSpec.getPartitionNum());
            }
        }
    }
}
Also used : Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Parameterized(org.junit.runners.Parameterized) Nullable(javax.annotation.Nullable) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) DateTimes(org.apache.druid.java.util.common.DateTimes) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) Files(java.nio.file.Files) InputFormat(org.apache.druid.data.input.InputFormat) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Test(org.junit.Test) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) File(java.io.File) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) Writer(java.io.Writer) Entry(java.util.Map.Entry) Assert(org.junit.Assert) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 9 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class MaterializedViewSupervisorTest method testCreateTask.

/**
 * Verifies that creating HadoopIndexTask compleates without raising exception.
 */
@Test
public void testCreateTask() {
    List<DataSegment> baseSegments = Collections.singletonList(new DataSegment("base", Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024));
    HadoopIndexTask task = spec.createTask(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", baseSegments);
    Assert.assertNotNull(task);
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) HadoopIndexTask(org.apache.druid.indexing.common.task.HadoopIndexTask) Test(org.junit.Test)

Example 10 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class MaterializedViewSupervisorTest method testCheckSegments.

@Test
public void testCheckSegments() throws IOException {
    Set<DataSegment> baseSegments = Sets.newHashSet(new DataSegment("base", Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "2015-01-02", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024), new DataSegment("base", Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024), new DataSegment("base", Intervals.of("2015-01-03T00Z/2015-01-04T00Z"), "2015-01-04", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024));
    Set<DataSegment> derivativeSegments = Sets.newHashSet(new DataSegment(derivativeDatasourceName, Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "2015-01-02", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024), new DataSegment(derivativeDatasourceName, Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "3015-01-01", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024));
    indexerMetadataStorageCoordinator.announceHistoricalSegments(baseSegments);
    indexerMetadataStorageCoordinator.announceHistoricalSegments(derivativeSegments);
    EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
    EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
    EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes();
    Pair<SortedMap<Interval, String>, Map<Interval, List<DataSegment>>> toBuildInterval = supervisor.checkSegments();
    Set<Interval> expectedToBuildInterval = Sets.newHashSet(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"));
    Map<Interval, List<DataSegment>> expectedSegments = new HashMap<>();
    expectedSegments.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), Collections.singletonList(new DataSegment("base", Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "2015-01-02", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024)));
    expectedSegments.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), Collections.singletonList(new DataSegment("base", Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024)));
    Assert.assertEquals(expectedToBuildInterval, toBuildInterval.lhs.keySet());
    Assert.assertEquals(expectedSegments, toBuildInterval.rhs);
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashMap(java.util.HashMap) SortedMap(java.util.SortedMap) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)20 Test (org.junit.Test)15 DataSegment (org.apache.druid.timeline.DataSegment)12 Interval (org.joda.time.Interval)12 List (java.util.List)11 ImmutableList (com.google.common.collect.ImmutableList)9 Map (java.util.Map)9 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)8 ArrayList (java.util.ArrayList)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 ImmutableMap (com.google.common.collect.ImmutableMap)6 File (java.io.File)6 HashMap (java.util.HashMap)6 HashPartitionFunction (org.apache.druid.timeline.partition.HashPartitionFunction)6 IOException (java.io.IOException)5 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)5 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)5 Intervals (org.apache.druid.java.util.common.Intervals)4 StringUtils (org.apache.druid.java.util.common.StringUtils)4 DataSchema (org.apache.druid.segment.indexing.DataSchema)4