Examples with NumberedShardSpec - org.apache.druid.timeline.partition.NumberedShardSpec

Example 11 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class ClosedSegmentsSinksBatchAppenderatorDriverTest method testSimple.

@Test
public void testSimple() throws Exception {
    Assert.assertNull(driver.startJob(null));
    for (InputRow row : ROWS) {
        Assert.assertTrue(driver.add(row, "dummy").isOk());
    }
    checkSegmentStates(2, SegmentState.APPENDING);
    driver.pushAllAndClear(TIMEOUT);
    checkSegmentStates(2, SegmentState.PUSHED_AND_DROPPED);
    final SegmentsAndCommitMetadata published = driver.publishAll(null, null, makeOkPublisher(), Function.identity()).get(TIMEOUT, TimeUnit.MILLISECONDS);
    Assert.assertEquals(ImmutableSet.of(new SegmentIdWithShardSpec(DATA_SOURCE, Intervals.of("2000/PT1H"), VERSION, new NumberedShardSpec(0, 0)), new SegmentIdWithShardSpec(DATA_SOURCE, Intervals.of("2000T01/PT1H"), VERSION, new NumberedShardSpec(0, 0))), published.getSegments().stream().map(SegmentIdWithShardSpec::fromDataSegment).collect(Collectors.toSet()));
    Assert.assertNull(published.getCommitMetadata());
}

Also used : MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 12 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class DruidSchemaTest method setUp.

@Before
public void setUp() throws Exception {
    final File tmpDir = temporaryFolder.newFolder();
    final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1")).withRollup(false).build()).rows(ROWS1).buildMMappedIndex();
    final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new LongSumAggregatorFactory("m1", "m1")).withRollup(false).build()).rows(ROWS2).buildMMappedIndex();
    walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(Intervals.of("2000/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index1).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(Intervals.of("2001/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index2).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index2);
    final DataSegment segment1 = new DataSegment("foo3", Intervals.of("2012/2013"), "version3", null, ImmutableList.of("dim1", "dim2"), ImmutableList.of("met1", "met2"), new NumberedShardSpec(2, 3), null, 1, 100L, PruneSpecsHolder.DEFAULT);
    final List<DataSegment> realtimeSegments = ImmutableList.of(segment1);
    serverView = new TestServerInventoryView(walker.getSegments(), realtimeSegments);
    druidServers = serverView.getDruidServers();
    schema = new DruidSchema(CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), serverView, segmentManager, new MapJoinableFactory(ImmutableSet.of(globalTableJoinable), ImmutableMap.of(globalTableJoinable.getClass(), GlobalTableDataSource.class)), PLANNER_CONFIG_DEFAULT, new NoopEscalator(), new BrokerInternalQueryConfig(), null) {

        @Override
        protected DruidTable buildDruidTable(String dataSource) {
            DruidTable table = super.buildDruidTable(dataSource);
            buildTableLatch.countDown();
            return table;
        }

        @Override
        void markDataSourceAsNeedRebuild(String datasource) {
            super.markDataSourceAsNeedRebuild(datasource);
            markDataSourceLatch.countDown();
        }
    };
    schema2 = new DruidSchema(CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), serverView, segmentManager, new MapJoinableFactory(ImmutableSet.of(globalTableJoinable), ImmutableMap.of(globalTableJoinable.getClass(), GlobalTableDataSource.class)), PLANNER_CONFIG_DEFAULT, new NoopEscalator(), new BrokerInternalQueryConfig(), null) {

        boolean throwException = true;

        @Override
        protected DruidTable buildDruidTable(String dataSource) {
            DruidTable table = super.buildDruidTable(dataSource);
            buildTableLatch.countDown();
            return table;
        }

        @Override
        protected Set<SegmentId> refreshSegments(final Set<SegmentId> segments) throws IOException {
            if (throwException) {
                throwException = false;
                throw new RuntimeException("Query[xxxx] url[http://xxxx:8083/druid/v2/] timed out.");
            } else {
                return super.refreshSegments(segments);
            }
        }

        @Override
        void markDataSourceAsNeedRebuild(String datasource) {
            super.markDataSourceAsNeedRebuild(datasource);
            markDataSourceLatch.countDown();
        }
    };
    schema.start();
    schema.awaitInitialization();
}

Also used : EnumSet(java.util.EnumSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) TestServerInventoryView(org.apache.druid.sql.calcite.util.TestServerInventoryView) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) IndexBuilder(org.apache.druid.segment.IndexBuilder) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DruidTable(org.apache.druid.sql.calcite.table.DruidTable) DataSegment(org.apache.druid.timeline.DataSegment) NoopEscalator(org.apache.druid.server.security.NoopEscalator) MapJoinableFactory(org.apache.druid.segment.join.MapJoinableFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) SegmentId(org.apache.druid.timeline.SegmentId) IOException(java.io.IOException) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) SpecificSegmentsQuerySegmentWalker(org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) QueryableIndex(org.apache.druid.segment.QueryableIndex) BrokerInternalQueryConfig(org.apache.druid.client.BrokerInternalQueryConfig) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) File(java.io.File) Before(org.junit.Before)

Example 13 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class SegmentManagerBroadcastJoinIndexedTableTest method createSegment.

private DataSegment createSegment(IncrementalIndex data, String interval, String version) throws IOException {
    final DataSegment tmpSegment = new DataSegment(TABLE_NAME, Intervals.of(interval), version, Collections.emptyMap(), Collections.emptyList(), Collections.emptyList(), new NumberedShardSpec(0, 0), 9, 100);
    final String storageDir = DataSegmentPusher.getDefaultStorageDir(tmpSegment, false);
    final File segmentDir = new File(segmentDeepStorageDir, storageDir);
    FileUtils.mkdirp(segmentDir);
    IndexMerger indexMerger = new IndexMergerV9(objectMapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance());
    SegmentizerFactory factory = new BroadcastJoinableMMappedQueryableSegmentizerFactory(indexIO, KEY_COLUMNS);
    indexMerger.persist(data, Intervals.of(interval), segmentDir, new IndexSpec(null, null, null, null, factory), null);
    final File factoryJson = new File(segmentDir, "factory.json");
    objectMapper.writeValue(factoryJson, factory);
    return tmpSegment.withLoadSpec(ImmutableMap.of("type", "local", "path", segmentDir.getAbsolutePath()));
}

Also used : IndexMerger(org.apache.druid.segment.IndexMerger) IndexSpec(org.apache.druid.segment.IndexSpec) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) BroadcastJoinableMMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.BroadcastJoinableMMappedQueryableSegmentizerFactory) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) DataSegment(org.apache.druid.timeline.DataSegment) File(java.io.File) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) BroadcastJoinableMMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.BroadcastJoinableMMappedQueryableSegmentizerFactory)

Example 14 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class IndexGeneratorJobTest method verifyJob.

private void verifyJob(IndexGeneratorJob job) throws IOException {
    Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job)));
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config).forEach(segmentAndIndexZipFilePath -> intervalToSegments.computeIfAbsent(segmentAndIndexZipFilePath.getSegment().getInterval(), k -> new ArrayList<>()).add(segmentAndIndexZipFilePath.getSegment()));
    List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = IndexGeneratorJob.getPublishedSegmentAndIndexZipFilePaths(config);
    JobHelper.renameIndexFilesForSegments(config.getSchema(), dataSegmentAndIndexZipFilePaths);
    JobHelper.maybeDeleteIntermediatePath(true, config.getSchema());
    File workingPath = new File(config.makeIntermediatePath().toUri().getPath());
    Assert.assertTrue(workingPath.exists());
    final Map<Interval, List<File>> intervalToIndexFiles = new HashMap<>();
    int segmentNum = 0;
    for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
        Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        File segmentOutputFolder = new File(StringUtils.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
        Assert.assertTrue(segmentOutputFolder.exists());
        Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
        for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
            File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
            Assert.assertTrue(individualSegmentFolder.exists());
            File indexZip = new File(individualSegmentFolder, "index.zip");
            Assert.assertTrue(indexZip.exists());
            intervalToIndexFiles.computeIfAbsent(new Interval(currTime, currTime.plusDays(1)), k -> new ArrayList<>()).add(indexZip);
        }
    }
    Assert.assertEquals(intervalToSegments.size(), intervalToIndexFiles.size());
    segmentNum = 0;
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final Interval interval = entry.getKey();
        final List<DataSegment> segments = entry.getValue();
        final List<File> indexFiles = intervalToIndexFiles.get(interval);
        Collections.sort(segments);
        indexFiles.sort(Comparator.comparing(File::getAbsolutePath));
        Assert.assertNotNull(indexFiles);
        Assert.assertEquals(segments.size(), indexFiles.size());
        Object[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        for (int i = 0; i < segments.size(); i++) {
            final DataSegment dataSegment = segments.get(i);
            final File indexZip = indexFiles.get(i);
            Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
            Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
            Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
            Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
            if ("website".equals(datasourceName)) {
                Assert.assertEquals("website", dataSegment.getDataSource());
                Assert.assertEquals("host", dataSegment.getDimensions().get(0));
                Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
                Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
            } else if ("inherit_dims".equals(datasourceName)) {
                Assert.assertEquals("inherit_dims", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("X", "Y", "M", "Q", "B", "F"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else if ("inherit_dims2".equals(datasourceName)) {
                Assert.assertEquals("inherit_dims2", dataSegment.getDataSource());
                Assert.assertEquals(ImmutableList.of("B", "F", "M", "Q", "X", "Y"), dataSegment.getDimensions());
                Assert.assertEquals("count", dataSegment.getMetrics().get(0));
            } else {
                Assert.fail("Test did not specify supported datasource name");
            }
            if (forceExtendableShardSpecs) {
                NumberedShardSpec spec = (NumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(i, spec.getPartitionNum());
                Assert.assertEquals(shardInfo.length, spec.getNumCorePartitions());
            } else if ("hashed".equals(partitionType)) {
                Integer[] hashShardInfo = (Integer[]) shardInfo[i];
                HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
                Assert.assertEquals((int) hashShardInfo[1], spec.getNumCorePartitions());
            } else if ("single".equals(partitionType)) {
                String[] singleDimensionShardInfo = (String[]) shardInfo[i];
                SingleDimensionShardSpec spec = (SingleDimensionShardSpec) dataSegment.getShardSpec();
                Assert.assertEquals(singleDimensionShardInfo[0], spec.getStart());
                Assert.assertEquals(singleDimensionShardInfo[1], spec.getEnd());
            } else {
                throw new RE("Invalid partition type:[%s]", partitionType);
            }
        }
    }
}

Also used : FileSystem(org.apache.hadoop.fs.FileSystem) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SequenceFile(org.apache.hadoop.io.SequenceFile) ByteBuffer(java.nio.ByteBuffer) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MRJobConfig(org.apache.hadoop.mapreduce.MRJobConfig) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) Path(org.apache.hadoop.fs.Path) Parameterized(org.junit.runners.Parameterized) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) DateTimeComparator(org.joda.time.DateTimeComparator) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) BytesWritable(org.apache.hadoop.io.BytesWritable) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) RE(org.apache.druid.java.util.common.RE) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) FileUtils(org.apache.commons.io.FileUtils) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) TreeMap(java.util.TreeMap) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Writer(org.apache.hadoop.io.SequenceFile.Writer) Assert(org.junit.Assert) Comparator(java.util.Comparator) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RE(org.apache.druid.java.util.common.RE) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Interval(org.joda.time.Interval)

Example 15 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class DatasourceRecordReaderTest method testSanity.

@Test
public void testSanity() throws Exception {
    final DataSegment segment = new DataSegment("testds", Intervals.of("2014-10-22T00:00:00.000Z/2014-10-23T00:00:00.000Z"), "2015-07-15T22:02:40.171Z", ImmutableMap.of("type", "local", "path", this.getClass().getClassLoader().getResource("test-segment/index.zip").getPath()), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), new NumberedShardSpec(0, 1), 9, 4096);
    InputSplit split = new DatasourceInputSplit(Collections.singletonList(WindowedDataSegment.of(segment)), null);
    Configuration config = new Configuration();
    DatasourceInputFormat.addDataSource(config, new DatasourceIngestionSpec(segment.getDataSource(), segment.getInterval(), null, null, null, segment.getDimensions(), segment.getMetrics(), false, null), Collections.emptyList(), 0);
    TaskAttemptContext context = EasyMock.createNiceMock(TaskAttemptContext.class);
    EasyMock.expect(context.getConfiguration()).andReturn(config).anyTimes();
    EasyMock.replay(context);
    DatasourceRecordReader rr = new DatasourceRecordReader();
    rr.initialize(split, context);
    Assert.assertEquals(0, rr.getProgress(), 0.0001);
    List<InputRow> rows = new ArrayList<>();
    while (rr.nextKeyValue()) {
        rows.add(rr.getCurrentValue());
    }
    verifyRows(rows);
    Assert.assertEquals(1, rr.getProgress(), 0.0001);
    rr.close();
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) InputRow(org.apache.druid.data.input.InputRow) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DataSegment(org.apache.druid.timeline.DataSegment) InputSplit(org.apache.hadoop.mapreduce.InputSplit) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Aggregations

NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)58 Test (org.junit.Test)45 DataSegment (org.apache.druid.timeline.DataSegment)41 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)26 ImmutableList (com.google.common.collect.ImmutableList)24 List (java.util.List)24 ArrayList (java.util.ArrayList)23 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)14 Interval (org.joda.time.Interval)14 NumberedOverwriteShardSpec (org.apache.druid.timeline.partition.NumberedOverwriteShardSpec)13 IOException (java.io.IOException)12 File (java.io.File)11 Map (java.util.Map)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10 HashMap (java.util.HashMap)10 TaskStatus (org.apache.druid.indexer.TaskStatus)9 Before (org.junit.Before)9 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 NoopTask (org.apache.druid.indexing.common.task.NoopTask)8 Task (org.apache.druid.indexing.common.task.Task)8