Search in sources :

Example 1 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project druid by druid-io.

the class SqlVectorizedExpressionSanityTest method setupClass.

@BeforeClass
public static void setupClass() {
    Calcites.setSystemProperties();
    ExpressionProcessing.initializeForStrictBooleansTests(true);
    CLOSER = Closer.create();
    final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench");
    final DataSegment dataSegment = DataSegment.builder().dataSource("foo").interval(schemaInfo.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build();
    final SegmentGenerator segmentGenerator = CLOSER.register(new SegmentGenerator());
    INDEX = CLOSER.register(segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, ROWS_PER_SEGMENT));
    CONGLOMERATE = QueryStackTests.createQueryRunnerFactoryConglomerate(CLOSER);
    WALKER = new SpecificSegmentsQuerySegmentWalker(CONGLOMERATE).add(dataSegment, INDEX);
    CLOSER.register(WALKER);
    final PlannerConfig plannerConfig = new PlannerConfig();
    final DruidSchemaCatalog rootSchema = CalciteTests.createMockRootSchema(CONGLOMERATE, WALKER, plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER);
    PLANNER_FACTORY = new PlannerFactory(rootSchema, CalciteTests.createMockQueryMakerFactory(WALKER, CONGLOMERATE), CalciteTests.createOperatorTable(), CalciteTests.createExprMacroTable(), plannerConfig, AuthTestUtils.TEST_AUTHORIZER_MAPPER, CalciteTests.getJsonMapper(), CalciteTests.DRUID_SCHEMA_NAME);
}
Also used : SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) SpecificSegmentsQuerySegmentWalker(org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) PlannerConfig(org.apache.druid.sql.calcite.planner.PlannerConfig) DruidSchemaCatalog(org.apache.druid.sql.calcite.schema.DruidSchemaCatalog) PlannerFactory(org.apache.druid.sql.calcite.planner.PlannerFactory) DataSegment(org.apache.druid.timeline.DataSegment) BeforeClass(org.junit.BeforeClass)

Example 2 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testAllocatePendingSegmentAfterDroppingExistingSegment.

/**
 * This test simulates an issue detected on the field consisting of the following sequence of events:
 * - A kafka stream segment was created on a given interval
 * - Later, after the above was published, another segment on same interval was created by the stream
 * - Later, after the above was published, another segment on same interval was created by the stream
 * - Later a compaction was issued for the three segments above
 * - Later, after the above was published, another segment on same interval was created by the stream
 * - Later, the compacted segment got dropped due to a drop rule
 * - Later, after the above was dropped, another segment on same interval was created by the stream but this
 *   time there was an integrity violation in the pending segments table because the
 *   {@link IndexerSQLMetadataStorageCoordinator#createNewSegment(Handle, String, Interval, PartialShardSpec, String)}
 *   method returned an segment id that already existed in the pending segments table
 */
@Test
public void testAllocatePendingSegmentAfterDroppingExistingSegment() {
    String maxVersion = "version_newer_newer";
    // simulate one load using kafka streaming
    final PartialShardSpec partialShardSpec = NumberedPartialShardSpec.instance();
    final String dataSource = "ds";
    final Interval interval = Intervals.of("2017-01-01/2017-02-01");
    final SegmentIdWithShardSpec identifier = coordinator.allocatePendingSegment(dataSource, "seq", null, interval, partialShardSpec, "version", true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version", identifier.toString());
    // simulate one more load using kafka streaming (as if previous segment was published, note different sequence name)
    final SegmentIdWithShardSpec identifier1 = coordinator.allocatePendingSegment(dataSource, "seq2", identifier.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_1", identifier1.toString());
    // simulate one more load using kafka streaming (as if previous segment was published, note different sequence name)
    final SegmentIdWithShardSpec identifier2 = coordinator.allocatePendingSegment(dataSource, "seq3", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_2", identifier2.toString());
    // now simulate that one compaction was done (batch) ingestion for same interval (like reindex of the previous three):
    DataSegment segment = new DataSegment("ds", Intervals.of("2017-01-01T00Z/2017-02-01T00Z"), "version_new", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(0), 9, 100);
    Assert.assertTrue(insertUsedSegments(ImmutableSet.of(segment)));
    List<String> ids = retrieveUsedSegmentIds();
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_new", ids.get(0));
    // one more load on same interval:
    final SegmentIdWithShardSpec identifier3 = coordinator.allocatePendingSegment(dataSource, "seq4", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_new_1", identifier3.toString());
    // now drop the used segment previously loaded:
    markAllSegmentsUnused(ImmutableSet.of(segment));
    // and final load, this reproduces an issue that could happen with multiple streaming appends,
    // followed by a reindex, followed by a drop, and more streaming data coming in for same interval
    final SegmentIdWithShardSpec identifier4 = coordinator.allocatePendingSegment(dataSource, "seq5", identifier1.toString(), interval, partialShardSpec, maxVersion, true);
    Assert.assertEquals("ds_2017-01-01T00:00:00.000Z_2017-02-01T00:00:00.000Z_version_new_2", identifier4.toString());
}
Also used : LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) NumberedOverwritePartialShardSpec(org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 3 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinatorTest method testAnnounceHistoricalSegments.

@Test
public void testAnnounceHistoricalSegments() throws IOException {
    Set<DataSegment> segments = new HashSet<>();
    for (int i = 0; i < 105; i++) {
        segments.add(new DataSegment("fooDataSource", Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "version", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(i), 9, 100));
    }
    coordinator.announceHistoricalSegments(segments);
    for (DataSegment segment : segments) {
        Assert.assertArrayEquals(mapper.writeValueAsString(segment).getBytes(StandardCharsets.UTF_8), derbyConnector.lookup(derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(), "id", "payload", segment.getId().toString()));
    }
    List<String> segmentIds = segments.stream().map(segment -> segment.getId().toString()).collect(Collectors.toList());
    segmentIds.sort(Comparator.naturalOrder());
    Assert.assertEquals(segmentIds, retrieveUsedSegmentIds());
    // Should not update dataSource metadata.
    Assert.assertEquals(0, metadataUpdateCounter.get());
}
Also used : Arrays(java.util.Arrays) DimensionRangeShardSpec(org.apache.druid.timeline.partition.DimensionRangeShardSpec) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Assertions(org.assertj.core.api.Assertions) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) HashBasedNumberedPartialShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec) DateTimes(org.apache.druid.java.util.common.DateTimes) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) SegmentPublishResult(org.apache.druid.indexing.overlord.SegmentPublishResult) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) StringMapper(org.skife.jdbi.v2.util.StringMapper) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) DataSegment(org.apache.druid.timeline.DataSegment) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) StringTuple(org.apache.druid.data.input.StringTuple) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) Iterables(com.google.common.collect.Iterables) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) ObjectMetadata(org.apache.druid.indexing.overlord.ObjectMetadata) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) ExpectedException(org.junit.rules.ExpectedException) Before(org.junit.Before) NumberedOverwritePartialShardSpec(org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test) IOException(java.io.IOException) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) AtomicLong(java.util.concurrent.atomic.AtomicLong) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) Handle(org.skife.jdbi.v2.Handle) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Assert(org.junit.Assert) Comparator(java.util.Comparator) Collections(java.util.Collections) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project druid by druid-io.

the class DruidSchemaTest method setUp.

@Before
public void setUp() throws Exception {
    final File tmpDir = temporaryFolder.newFolder();
    final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1")).withRollup(false).build()).rows(ROWS1).buildMMappedIndex();
    final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new LongSumAggregatorFactory("m1", "m1")).withRollup(false).build()).rows(ROWS2).buildMMappedIndex();
    walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(Intervals.of("2000/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index1).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(Intervals.of("2001/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index2).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index2);
    final DataSegment segment1 = new DataSegment("foo3", Intervals.of("2012/2013"), "version3", null, ImmutableList.of("dim1", "dim2"), ImmutableList.of("met1", "met2"), new NumberedShardSpec(2, 3), null, 1, 100L, PruneSpecsHolder.DEFAULT);
    final List<DataSegment> realtimeSegments = ImmutableList.of(segment1);
    serverView = new TestServerInventoryView(walker.getSegments(), realtimeSegments);
    druidServers = serverView.getDruidServers();
    schema = new DruidSchema(CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), serverView, segmentManager, new MapJoinableFactory(ImmutableSet.of(globalTableJoinable), ImmutableMap.of(globalTableJoinable.getClass(), GlobalTableDataSource.class)), PLANNER_CONFIG_DEFAULT, new NoopEscalator(), new BrokerInternalQueryConfig(), null) {

        @Override
        protected DruidTable buildDruidTable(String dataSource) {
            DruidTable table = super.buildDruidTable(dataSource);
            buildTableLatch.countDown();
            return table;
        }

        @Override
        void markDataSourceAsNeedRebuild(String datasource) {
            super.markDataSourceAsNeedRebuild(datasource);
            markDataSourceLatch.countDown();
        }
    };
    schema2 = new DruidSchema(CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), serverView, segmentManager, new MapJoinableFactory(ImmutableSet.of(globalTableJoinable), ImmutableMap.of(globalTableJoinable.getClass(), GlobalTableDataSource.class)), PLANNER_CONFIG_DEFAULT, new NoopEscalator(), new BrokerInternalQueryConfig(), null) {

        boolean throwException = true;

        @Override
        protected DruidTable buildDruidTable(String dataSource) {
            DruidTable table = super.buildDruidTable(dataSource);
            buildTableLatch.countDown();
            return table;
        }

        @Override
        protected Set<SegmentId> refreshSegments(final Set<SegmentId> segments) throws IOException {
            if (throwException) {
                throwException = false;
                throw new RuntimeException("Query[xxxx] url[http://xxxx:8083/druid/v2/] timed out.");
            } else {
                return super.refreshSegments(segments);
            }
        }

        @Override
        void markDataSourceAsNeedRebuild(String datasource) {
            super.markDataSourceAsNeedRebuild(datasource);
            markDataSourceLatch.countDown();
        }
    };
    schema.start();
    schema.awaitInitialization();
}
Also used : EnumSet(java.util.EnumSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) TestServerInventoryView(org.apache.druid.sql.calcite.util.TestServerInventoryView) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) IndexBuilder(org.apache.druid.segment.IndexBuilder) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DruidTable(org.apache.druid.sql.calcite.table.DruidTable) DataSegment(org.apache.druid.timeline.DataSegment) NoopEscalator(org.apache.druid.server.security.NoopEscalator) MapJoinableFactory(org.apache.druid.segment.join.MapJoinableFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) SegmentId(org.apache.druid.timeline.SegmentId) IOException(java.io.IOException) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) SpecificSegmentsQuerySegmentWalker(org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) QueryableIndex(org.apache.druid.segment.QueryableIndex) BrokerInternalQueryConfig(org.apache.druid.client.BrokerInternalQueryConfig) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) File(java.io.File) Before(org.junit.Before)

Example 5 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project druid by druid-io.

the class ExpressionVectorSelectorsTest method setupClass.

@BeforeClass
public static void setupClass() {
    CLOSER = Closer.create();
    final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench");
    final DataSegment dataSegment = DataSegment.builder().dataSource("foo").interval(schemaInfo.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build();
    final SegmentGenerator segmentGenerator = CLOSER.register(new SegmentGenerator());
    INDEX = CLOSER.register(segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, ROWS_PER_SEGMENT));
}
Also used : SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) DataSegment(org.apache.druid.timeline.DataSegment) BeforeClass(org.junit.BeforeClass)

Aggregations

LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)42 DataSegment (org.apache.druid.timeline.DataSegment)30 Test (org.junit.Test)18 QueryableIndex (org.apache.druid.segment.QueryableIndex)14 Interval (org.joda.time.Interval)14 GeneratorSchemaInfo (org.apache.druid.segment.generator.GeneratorSchemaInfo)12 SegmentGenerator (org.apache.druid.segment.generator.SegmentGenerator)12 SpecificSegmentsQuerySegmentWalker (org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker)12 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)11 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)9 Setup (org.openjdk.jmh.annotations.Setup)9 SegmentIdWithShardSpec (org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)8 MetadataStorageTablesConfig (org.apache.druid.metadata.MetadataStorageTablesConfig)7 IndexBuilder (org.apache.druid.segment.IndexBuilder)7 DataSegmentPusher (org.apache.druid.segment.loading.DataSegmentPusher)7 HdfsDataSegmentPusher (org.apache.druid.storage.hdfs.HdfsDataSegmentPusher)7 HdfsDataSegmentPusherConfig (org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig)7 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)7 Path (org.apache.hadoop.fs.Path)7 File (java.io.File)6