Search in sources :

Example 1 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testAllocatePendingSegmentsForHashBasedNumberedShardSpec.

@Test
public void testAllocatePendingSegmentsForHashBasedNumberedShardSpec() throws IOException {
    final PartialShardSpec partialShardSpec = new HashBasedNumberedPartialShardSpec(null, 2, 5, null);
    final String dataSource = "ds";
    final Interval interval = Intervals.of("2017-01-01/2017-02-01");
    SegmentIdWithShardSpec id = coordinator.allocatePendingSegment(dataSource, "seq", null, interval, partialShardSpec, "version", true);
    HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec();
    Assert.assertEquals(0, shardSpec.getPartitionNum());
    Assert.assertEquals(0, shardSpec.getNumCorePartitions());
    Assert.assertEquals(5, shardSpec.getNumBuckets());
    coordinator.announceHistoricalSegments(Collections.singleton(new DataSegment(id.getDataSource(), id.getInterval(), id.getVersion(), null, Collections.emptyList(), Collections.emptyList(), id.getShardSpec(), 0, 10L)));
    id = coordinator.allocatePendingSegment(dataSource, "seq2", null, interval, partialShardSpec, "version", true);
    shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec();
    Assert.assertEquals(1, shardSpec.getPartitionNum());
    Assert.assertEquals(0, shardSpec.getNumCorePartitions());
    Assert.assertEquals(5, shardSpec.getNumBuckets());
    coordinator.announceHistoricalSegments(Collections.singleton(new DataSegment(id.getDataSource(), id.getInterval(), id.getVersion(), null, Collections.emptyList(), Collections.emptyList(), id.getShardSpec(), 0, 10L)));
    id = coordinator.allocatePendingSegment(dataSource, "seq3", null, interval, new HashBasedNumberedPartialShardSpec(null, 2, 3, null), "version", true);
    shardSpec = (HashBasedNumberedShardSpec) id.getShardSpec();
    Assert.assertEquals(2, shardSpec.getPartitionNum());
    Assert.assertEquals(0, shardSpec.getNumCorePartitions());
    Assert.assertEquals(3, shardSpec.getNumBuckets());
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) NumberedOverwritePartialShardSpec(org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 2 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class SegmentPublisherHelperTest method testAnnotateCorePartitionSetSizeForHashNumberedShardSpec.

@Test
public void testAnnotateCorePartitionSetSizeForHashNumberedShardSpec() {
    final Set<DataSegment> segments = ImmutableSet.of(newSegment(new BuildingHashBasedNumberedShardSpec(0, 0, 3, null, HashPartitionFunction.MURMUR3_32_ABS, new ObjectMapper())), newSegment(new BuildingHashBasedNumberedShardSpec(1, 1, 3, null, HashPartitionFunction.MURMUR3_32_ABS, new ObjectMapper())), newSegment(new BuildingHashBasedNumberedShardSpec(2, 2, 3, null, HashPartitionFunction.MURMUR3_32_ABS, new ObjectMapper())));
    final Set<DataSegment> annotated = SegmentPublisherHelper.annotateShardSpec(segments);
    for (DataSegment segment : annotated) {
        Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
        final HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
        Assert.assertEquals(3, shardSpec.getNumCorePartitions());
    }
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) BuildingHashBasedNumberedShardSpec(org.apache.druid.timeline.partition.BuildingHashBasedNumberedShardSpec) BuildingHashBasedNumberedShardSpec(org.apache.druid.timeline.partition.BuildingHashBasedNumberedShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 3 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class IndexGeneratorJobTest method verifyJob.

private void verifyJob(IndexGeneratorJob job) throws IOException {
    Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job)));
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config).forEach(segmentAndIndexZipFilePath -> intervalToSegments.computeIfAbsent(segmentAndIndexZipFilePath.getSegment().getInterval(), k -> new ArrayList<>()).add(segmentAndIndexZipFilePath.getSegment()));
    List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config);
    JobHelper.renameIndexFilesForSegments(config.getSchema(), dataSegmentAndIndexZipFilePaths);
    JobHelper.maybeDeleteIntermediatePath(true, config.getSchema());
    File workingPath = new File(config.makeIntermediatePath().toUri().getPath());
    Assert.assertTrue(workingPath.exists());
    final Map<Interval, List<File>> intervalToIndexFiles = new HashMap<>();
    int segmentNum = 0;
    for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
        Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        File segmentOutputFolder = new File(StringUtils.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
        Assert.assertTrue(segmentOutputFolder.exists());
        Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
        for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
            File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
            Assert.assertTrue(individualSegmentFolder.exists());
            File indexZip = new File(individualSegmentFolder, "index.zip");
            Assert.assertTrue(indexZip.exists());
            intervalToIndexFiles.computeIfAbsent(new Interval(currTime, currTime.plusDays(1)), k -> new ArrayList<>()).add(indexZip);
        }
    }
    Assert.assertEquals(intervalToSegments.size(), intervalToIndexFiles.size());
    segmentNum = 0;
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final Interval interval = entry.getKey();
        final List<DataSegment> segments = entry.getValue();
        final List<File> indexFiles = intervalToIndexFiles.get(interval);
        Collections.sort(segments);
        indexFiles.sort(Comparator.comparing(File::getAbsolutePath));
        Assert.assertNotNull(indexFiles);
        Assert.assertEquals(segments.size(), indexFiles.size());
        Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        for (int i = 0; i < segments.size(); i++) {
            final DataSegment dataSegment = segments.get(i);
            final File indexZip = indexFiles.get(i);
            Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
            Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
            Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
            Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
            if ("website".equals(datasourceName)) {
                Assert.assertEquals("website", dataSegment.getDataSource());
                Assert.assertEquals("host", dataSegment.getDimensions().get(0));
                Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
                Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
            } else if ("inherit_dims".equals(datasourceName)) {
                Assert.assertEquals("inherit_dims", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("X", "Y", "M", "Q", "B", "F"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else if ("inherit_dims2".equals(datasourceName)) {
                Assert.assertEquals("inherit_dims2", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("B", "F", "M", "Q", "X", "Y"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else {
                Assert.fail("Test did not specify supported datasource name");
            }
            if (forceExtendableShardSpecs) {
                NumberedShardSpec spec = (NumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(i, spec.getPartitionNum());
                Assert.assertEquals(shardInfo.length, spec.getNumCorePartitions());
            } else if ("hashed".equals(partitionType)) {
                Integer[] hashShardInfo = (Integer[]) shardInfo[i];
                HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
                Assert.assertEquals((int) hashShardInfo[1], spec.getNumCorePartitions());
            } else if ("single".equals(partitionType)) {
                String[] singleDimensionShardInfo = (String[]) shardInfo[i];
                SingleDimensionShardSpec spec = (SingleDimensionShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(singleDimensionShardInfo[0], spec.getStart());
                Assert.assertEquals(singleDimensionShardInfo[1], spec.getEnd());
            } else {
                throw new RE("Invalid partition type:[%s]", partitionType);
            }
        }
    }
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SequenceFile(org.apache.hadoop.io.SequenceFile) ByteBuffer(java.nio.ByteBuffer) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MRJobConfig(org.apache.hadoop.mapreduce.MRJobConfig) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) Path(org.apache.hadoop.fs.Path) Parameterized(org.junit.runners.Parameterized) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) DateTimeComparator(org.joda.time.DateTimeComparator) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) BytesWritable(org.apache.hadoop.io.BytesWritable) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) RE(org.apache.druid.java.util.common.RE) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) FileUtils(org.apache.commons.io.FileUtils) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) TreeMap(java.util.TreeMap) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Writer(org.apache.hadoop.io.SequenceFile.Writer) Assert(org.junit.Assert) Comparator(java.util.Comparator) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RE(org.apache.druid.java.util.common.RE) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Interval(org.joda.time.Interval)

Example 4 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class HadoopDruidDetermineConfigurationJobTest method testRunWithSingleDimensionPartitionsSpecCreateHashBasedNumberedShardSpecWithoutHashPartitionFunction.

@Test
public void testRunWithSingleDimensionPartitionsSpecCreateHashBasedNumberedShardSpecWithoutHashPartitionFunction() {
    final Set<Interval> intervals = ImmutableSet.of(Intervals.of("2020-01-01/P1D"), Intervals.of("2020-01-02/P1D"), Intervals.of("2020-01-03/P1D"));
    final SingleDimensionPartitionsSpec partitionsSpec = new SingleDimensionPartitionsSpec(1000, null, "dim", false);
    final HadoopDruidIndexerConfig config = Mockito.mock(HadoopDruidIndexerConfig.class);
    Mockito.when(config.isDeterminingPartitions()).thenReturn(false);
    Mockito.when(config.getPartitionsSpec()).thenReturn(partitionsSpec);
    Mockito.when(config.getSegmentGranularIntervals()).thenReturn(intervals);
    final ArgumentCaptor<Map<Long, List<HadoopyShardSpec>>> resultCaptor = ArgumentCaptor.forClass(Map.class);
    Mockito.doNothing().when(config).setShardSpecs(resultCaptor.capture());
    final HadoopDruidDetermineConfigurationJob job = new HadoopDruidDetermineConfigurationJob(config);
    Assert.assertTrue(job.run());
    final Map<Long, List<HadoopyShardSpec>> shardSpecs = resultCaptor.getValue();
    Assert.assertEquals(3, shardSpecs.size());
    for (Interval interval : intervals) {
        final List<HadoopyShardSpec> shardSpecsPerInterval = shardSpecs.get(interval.getStartMillis());
        Assert.assertEquals(1, shardSpecsPerInterval.size());
        Assert.assertEquals(new HashBasedNumberedShardSpec(0, shardSpecsPerInterval.size(), 0, shardSpecsPerInterval.size(), ImmutableList.of("dim"), null, new ObjectMapper()), shardSpecsPerInterval.get(0).getActualSpec());
    }
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 5 with HashBasedNumberedShardSpec

use of org.apache.druid.timeline.partition.HashBasedNumberedShardSpec in project druid by druid-io.

the class HadoopDruidDetermineConfigurationJobTest method testRunWithHashedPartitionsSpecCreateHashBasedNumberedShardSpecWithHashPartitionFunction.

@Test
public void testRunWithHashedPartitionsSpecCreateHashBasedNumberedShardSpecWithHashPartitionFunction() {
    final Set<Interval> intervals = ImmutableSet.of(Intervals.of("2020-01-01/P1D"), Intervals.of("2020-01-02/P1D"), Intervals.of("2020-01-03/P1D"));
    final HashedPartitionsSpec partitionsSpec = new HashedPartitionsSpec(null, 2, null, HashPartitionFunction.MURMUR3_32_ABS, null, null);
    final HadoopDruidIndexerConfig config = Mockito.mock(HadoopDruidIndexerConfig.class);
    Mockito.when(config.isDeterminingPartitions()).thenReturn(false);
    Mockito.when(config.getPartitionsSpec()).thenReturn(partitionsSpec);
    Mockito.when(config.getSegmentGranularIntervals()).thenReturn(intervals);
    final ArgumentCaptor<Map<Long, List<HadoopyShardSpec>>> resultCaptor = ArgumentCaptor.forClass(Map.class);
    Mockito.doNothing().when(config).setShardSpecs(resultCaptor.capture());
    final HadoopDruidDetermineConfigurationJob job = new HadoopDruidDetermineConfigurationJob(config);
    Assert.assertTrue(job.run());
    final Map<Long, List<HadoopyShardSpec>> shardSpecs = resultCaptor.getValue();
    Assert.assertEquals(3, shardSpecs.size());
    for (Interval interval : intervals) {
        final List<HadoopyShardSpec> shardSpecsPerInterval = shardSpecs.get(interval.getStartMillis());
        Assert.assertEquals(2, shardSpecsPerInterval.size());
        for (int i = 0; i < shardSpecsPerInterval.size(); i++) {
            Assert.assertEquals(new HashBasedNumberedShardSpec(i, shardSpecsPerInterval.size(), i, shardSpecsPerInterval.size(), null, HashPartitionFunction.MURMUR3_32_ABS, new ObjectMapper()), shardSpecsPerInterval.get(i).getActualSpec());
        }
    }
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)20 Test (org.junit.Test)15 DataSegment (org.apache.druid.timeline.DataSegment)12 Interval (org.joda.time.Interval)12 List (java.util.List)11 ImmutableList (com.google.common.collect.ImmutableList)9 Map (java.util.Map)9 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)8 ArrayList (java.util.ArrayList)7 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)6 ImmutableMap (com.google.common.collect.ImmutableMap)6 File (java.io.File)6 HashMap (java.util.HashMap)6 HashPartitionFunction (org.apache.druid.timeline.partition.HashPartitionFunction)6 IOException (java.io.IOException)5 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)5 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)5 Intervals (org.apache.druid.java.util.common.Intervals)4 StringUtils (org.apache.druid.java.util.common.StringUtils)4 DataSchema (org.apache.druid.segment.indexing.DataSchema)4