Search in sources :

Example 1 with SingleDimensionShardSpec

use of org.apache.druid.timeline.partition.SingleDimensionShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testAddNumberedShardSpecAfterSingleDimensionsShardSpecWithUnknownCorePartitionSize.

@Test
public void testAddNumberedShardSpecAfterSingleDimensionsShardSpecWithUnknownCorePartitionSize() throws IOException {
    final String datasource = "datasource";
    final Interval interval = Intervals.of("2020-01-01/P1D");
    final String version = "version";
    final List<String> dimensions = ImmutableList.of("dim");
    final List<String> metrics = ImmutableList.of("met");
    final Set<DataSegment> originalSegments = new HashSet<>();
    for (int i = 0; i < 6; i++) {
        final String start = i == 0 ? null : String.valueOf(i - 1);
        final String end = i == 5 ? null : String.valueOf(i);
        originalSegments.add(new DataSegment(datasource, interval, version, ImmutableMap.of(), dimensions, metrics, new SingleDimensionShardSpec("dim", start, end, i, // emulate shardSpecs created in older versions of Druid
        null), 9, 10L));
    }
    coordinator.announceHistoricalSegments(originalSegments);
    final SegmentIdWithShardSpec id = coordinator.allocatePendingSegment(datasource, "seq", null, interval, NumberedPartialShardSpec.instance(), version, false);
    Assert.assertNull(id);
}
Also used : DataSegment(org.apache.druid.timeline.DataSegment) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Interval(org.joda.time.Interval) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 2 with SingleDimensionShardSpec

use of org.apache.druid.timeline.partition.SingleDimensionShardSpec in project druid by druid-io.

the class CachingClusteredClientTest method populateTimeline.

private List<Map<DruidServer, ServerExpectations>> populateTimeline(List<Interval> queryIntervals, List<List<Iterable<Result<Object>>>> expectedResults, int numQueryIntervals, List<Object> mocks) {
    timeline = new VersionedIntervalTimeline<>(Ordering.natural());
    final List<Map<DruidServer, ServerExpectations>> serverExpectationList = new ArrayList<>();
    for (int k = 0; k < numQueryIntervals + 1; ++k) {
        final int numChunks = expectedResults.get(k).size();
        final TreeMap<DruidServer, ServerExpectations> serverExpectations = new TreeMap<>();
        serverExpectationList.add(serverExpectations);
        for (int j = 0; j < numChunks; ++j) {
            DruidServer lastServer = servers[random.nextInt(servers.length)];
            serverExpectations.computeIfAbsent(lastServer, server -> new ServerExpectations(server, makeMock(mocks, QueryRunner.class)));
            final ShardSpec shardSpec;
            if (numChunks == 1) {
                shardSpec = new SingleDimensionShardSpec("dimAll", null, null, 0, 1);
            } else {
                String start = null;
                String end = null;
                if (j > 0) {
                    start = String.valueOf(j);
                }
                if (j + 1 < numChunks) {
                    end = String.valueOf(j + 1);
                }
                shardSpec = new SingleDimensionShardSpec("dim" + k, start, end, j, numChunks);
            }
            DataSegment mockSegment = makeMock(mocks, DataSegment.class);
            ServerExpectation<Object> expectation = new ServerExpectation<>(// interval/chunk
            SegmentId.dummy(StringUtils.format("%s_%s", k, j)), queryIntervals.get(k), mockSegment, shardSpec, expectedResults.get(k).get(j));
            serverExpectations.get(lastServer).addExpectation(expectation);
            EasyMock.expect(mockSegment.getSize()).andReturn(0L).anyTimes();
            EasyMock.replay(mockSegment);
            ServerSelector selector = new ServerSelector(expectation.getSegment(), new HighestPriorityTierSelectorStrategy(new RandomServerSelectorStrategy()));
            selector.addServerAndUpdateSegment(new QueryableDruidServer(lastServer, null), selector.getSegment());
            EasyMock.reset(mockSegment);
            EasyMock.expect(mockSegment.getShardSpec()).andReturn(shardSpec).anyTimes();
            timeline.add(queryIntervals.get(k), String.valueOf(k), shardSpec.createChunk(selector));
        }
    }
    return serverExpectationList;
}
Also used : ArrayList(java.util.ArrayList) QueryableDruidServer(org.apache.druid.client.selector.QueryableDruidServer) TreeMap(java.util.TreeMap) DataSegment(org.apache.druid.timeline.DataSegment) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) QueryableDruidServer(org.apache.druid.client.selector.QueryableDruidServer) ServerSelector(org.apache.druid.client.selector.ServerSelector) HighestPriorityTierSelectorStrategy(org.apache.druid.client.selector.HighestPriorityTierSelectorStrategy) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) RandomServerSelectorStrategy(org.apache.druid.client.selector.RandomServerSelectorStrategy)

Example 3 with SingleDimensionShardSpec

use of org.apache.druid.timeline.partition.SingleDimensionShardSpec in project druid by druid-io.

the class SegmentPublisherHelperTest method testAnnotateCorePartitionSetSizeForSingleDimensionShardSpec.

@Test
public void testAnnotateCorePartitionSetSizeForSingleDimensionShardSpec() {
    final Set<DataSegment> segments = ImmutableSet.of(newSegment(new BuildingSingleDimensionShardSpec(0, "dim", null, "ccc", 0)), newSegment(new BuildingSingleDimensionShardSpec(1, "dim", null, "ccc", 1)), newSegment(new BuildingSingleDimensionShardSpec(2, "dim", null, "ccc", 2)));
    final Set<DataSegment> annotated = SegmentPublisherHelper.annotateShardSpec(segments);
    for (DataSegment segment : annotated) {
        Assert.assertSame(SingleDimensionShardSpec.class, segment.getShardSpec().getClass());
        final SingleDimensionShardSpec shardSpec = (SingleDimensionShardSpec) segment.getShardSpec();
        Assert.assertEquals(3, shardSpec.getNumCorePartitions());
    }
}
Also used : BuildingSingleDimensionShardSpec(org.apache.druid.timeline.partition.BuildingSingleDimensionShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) BuildingSingleDimensionShardSpec(org.apache.druid.timeline.partition.BuildingSingleDimensionShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Test(org.junit.Test)

Example 4 with SingleDimensionShardSpec

use of org.apache.druid.timeline.partition.SingleDimensionShardSpec in project druid by druid-io.

the class IndexGeneratorJobTest method verifyJob.

private void verifyJob(IndexGeneratorJob job) throws IOException {
    Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job)));
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config).forEach(segmentAndIndexZipFilePath -> intervalToSegments.computeIfAbsent(segmentAndIndexZipFilePath.getSegment().getInterval(), k -> new ArrayList<>()).add(segmentAndIndexZipFilePath.getSegment()));
    List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config);
    JobHelper.renameIndexFilesForSegments(config.getSchema(), dataSegmentAndIndexZipFilePaths);
    JobHelper.maybeDeleteIntermediatePath(true, config.getSchema());
    File workingPath = new File(config.makeIntermediatePath().toUri().getPath());
    Assert.assertTrue(workingPath.exists());
    final Map<Interval, List<File>> intervalToIndexFiles = new HashMap<>();
    int segmentNum = 0;
    for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
        Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        File segmentOutputFolder = new File(StringUtils.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
        Assert.assertTrue(segmentOutputFolder.exists());
        Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
        for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
            File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
            Assert.assertTrue(individualSegmentFolder.exists());
            File indexZip = new File(individualSegmentFolder, "index.zip");
            Assert.assertTrue(indexZip.exists());
            intervalToIndexFiles.computeIfAbsent(new Interval(currTime, currTime.plusDays(1)), k -> new ArrayList<>()).add(indexZip);
        }
    }
    Assert.assertEquals(intervalToSegments.size(), intervalToIndexFiles.size());
    segmentNum = 0;
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final Interval interval = entry.getKey();
        final List<DataSegment> segments = entry.getValue();
        final List<File> indexFiles = intervalToIndexFiles.get(interval);
        Collections.sort(segments);
        indexFiles.sort(Comparator.comparing(File::getAbsolutePath));
        Assert.assertNotNull(indexFiles);
        Assert.assertEquals(segments.size(), indexFiles.size());
        Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        for (int i = 0; i < segments.size(); i++) {
            final DataSegment dataSegment = segments.get(i);
            final File indexZip = indexFiles.get(i);
            Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
            Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
            Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
            Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
            if ("website".equals(datasourceName)) {
                Assert.assertEquals("website", dataSegment.getDataSource());
                Assert.assertEquals("host", dataSegment.getDimensions().get(0));
                Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
                Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
            } else if ("inherit_dims".equals(datasourceName)) {
                Assert.assertEquals("inherit_dims", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("X", "Y", "M", "Q", "B", "F"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else if ("inherit_dims2".equals(datasourceName)) {
                Assert.assertEquals("inherit_dims2", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("B", "F", "M", "Q", "X", "Y"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else {
                Assert.fail("Test did not specify supported datasource name");
            }
            if (forceExtendableShardSpecs) {
                NumberedShardSpec spec = (NumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(i, spec.getPartitionNum());
                Assert.assertEquals(shardInfo.length, spec.getNumCorePartitions());
            } else if ("hashed".equals(partitionType)) {
                Integer[] hashShardInfo = (Integer[]) shardInfo[i];
                HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
                Assert.assertEquals((int) hashShardInfo[1], spec.getNumCorePartitions());
            } else if ("single".equals(partitionType)) {
                String[] singleDimensionShardInfo = (String[]) shardInfo[i];
                SingleDimensionShardSpec spec = (SingleDimensionShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(singleDimensionShardInfo[0], spec.getStart());
                Assert.assertEquals(singleDimensionShardInfo[1], spec.getEnd());
            } else {
                throw new RE("Invalid partition type:[%s]", partitionType);
            }
        }
    }
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SequenceFile(org.apache.hadoop.io.SequenceFile) ByteBuffer(java.nio.ByteBuffer) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MRJobConfig(org.apache.hadoop.mapreduce.MRJobConfig) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) Path(org.apache.hadoop.fs.Path) Parameterized(org.junit.runners.Parameterized) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) DateTimeComparator(org.joda.time.DateTimeComparator) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) BytesWritable(org.apache.hadoop.io.BytesWritable) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) RE(org.apache.druid.java.util.common.RE) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) FileUtils(org.apache.commons.io.FileUtils) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) TreeMap(java.util.TreeMap) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Writer(org.apache.hadoop.io.SequenceFile.Writer) Assert(org.junit.Assert) Comparator(java.util.Comparator) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RE(org.apache.druid.java.util.common.RE) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Interval(org.joda.time.Interval)

Example 5 with SingleDimensionShardSpec

use of org.apache.druid.timeline.partition.SingleDimensionShardSpec in project druid by druid-io.

the class DeterminePartitionsJobTest method testPartitionJob.

@Test
public void testPartitionJob() {
    DeterminePartitionsJob job = new DeterminePartitionsJob(config);
    job.run();
    int shardNum = 0;
    int segmentNum = 0;
    Assert.assertEquals(expectedNumOfSegments, config.getSchema().getTuningConfig().getShardSpecs().size());
    for (Map.Entry<Long, List<HadoopyShardSpec>> entry : config.getSchema().getTuningConfig().getShardSpecs().entrySet()) {
        int partitionNum = 0;
        List<HadoopyShardSpec> specs = entry.getValue();
        Assert.assertEquals(expectedNumOfShardsForEachSegment[segmentNum], specs.size());
        for (HadoopyShardSpec spec : specs) {
            SingleDimensionShardSpec actualSpec = (SingleDimensionShardSpec) spec.getActualSpec();
            Assert.assertEquals(shardNum, spec.getShardNum());
            Assert.assertEquals(expectedStartEndForEachShard[segmentNum][partitionNum][0], actualSpec.getStart());
            Assert.assertEquals(expectedStartEndForEachShard[segmentNum][partitionNum][1], actualSpec.getEnd());
            Assert.assertEquals(partitionNum, actualSpec.getPartitionNum());
            shardNum++;
            partitionNum++;
        }
        segmentNum++;
    }
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Test(org.junit.Test)

Aggregations

SingleDimensionShardSpec (org.apache.druid.timeline.partition.SingleDimensionShardSpec)9 DataSegment (org.apache.druid.timeline.DataSegment)8 Test (org.junit.Test)7 File (java.io.File)4 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 ImmutableList (com.google.common.collect.ImmutableList)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 Writer (java.io.Writer)3 HashMap (java.util.HashMap)3 List (java.util.List)3 DimensionRangePartitionsSpec (org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec)3 Interval (org.joda.time.Interval)3 IOException (java.io.IOException)2 Collection (java.util.Collection)2 Collections (java.util.Collections)2 HashSet (java.util.HashSet)2 Entry (java.util.Map.Entry)2 TreeMap (java.util.TreeMap)2 CSVParseSpec (org.apache.druid.data.input.impl.CSVParseSpec)2