Search in sources :

Example 21 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project druid by druid-io.

the class ExpressionVectorSelectorBenchmark method setup.

@Setup(Level.Trial)
public void setup() {
    this.closer = Closer.create();
    final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench");
    final DataSegment dataSegment = DataSegment.builder().dataSource("foo").interval(schemaInfo.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build();
    final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
    this.index = closer.register(segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, rowsPerSegment));
    Expr parsed = Parser.parse(expression, ExprMacroTable.nil());
    outputType = parsed.getOutputType(new ColumnInspector() {

        @Nullable
        @Override
        public ColumnCapabilities getColumnCapabilities(String column) {
            return QueryableIndexStorageAdapter.getColumnCapabilities(index, column);
        }
    });
    checkSanity();
}
Also used : SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) Expr(org.apache.druid.math.expr.Expr) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) ColumnInspector(org.apache.druid.segment.ColumnInspector) DataSegment(org.apache.druid.timeline.DataSegment) Setup(org.openjdk.jmh.annotations.Setup)

Example 22 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertIntoTable.

@Test
public void testCommitInsertIntoTable() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(1)));
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, false);
    Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
    final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
    Assert.assertEquals(2, dataSegmentList.size());
    DataSegment persistedSegment = dataSegmentList.get(1);
    // Insert into appends to old version
    Assert.assertEquals("v0", persistedSegment.getVersion());
    Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
    Assert.assertEquals(2, persistedSegment.getShardSpec().getPartitionNum());
    Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
    Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
    Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) DataSegment(org.apache.druid.timeline.DataSegment) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) File(java.io.File) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 23 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project hive by apache.

the class TestDruidStorageHandler method testCommitInsertIntoWithNonExtendableSegment.

@Test(expected = MetaException.class)
public void testCommitInsertIntoWithNonExtendableSegment() throws MetaException, IOException {
    DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
    MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
    druidStorageHandler.preCreateTable(tableMock);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
    List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, "index_old_1.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", NoneShardSpec.instance()), createSegment(new Path(taskDirPath, "index_old_2.zip").toString(), new Interval(200, 250, DateTimeZone.UTC), "v0", new LinearShardSpec(0)), createSegment(new Path(taskDirPath, "index_old_3.zip").toString(), new Interval(250, 300, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
    HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
    pusherConfig.setStorageDirectory(taskDirPath.toString());
    DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
    DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
    // Try appending to non extendable shard spec
    DataSegment conflictingSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
    Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(conflictingSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
    DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, conflictingSegment, descriptorPath);
    druidStorageHandler.commitInsertTable(tableMock, false);
}
Also used : Path(org.apache.hadoop.fs.Path) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) HdfsDataSegmentPusherConfig(org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig) DataSegment(org.apache.druid.timeline.DataSegment) HdfsDataSegmentPusher(org.apache.druid.storage.hdfs.HdfsDataSegmentPusher) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 24 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project hive by apache.

the class DruidRecordWriter method write.

@Override
public void write(Writable w) throws IOException {
    DruidWritable record = (DruidWritable) w;
    final long timestamp = (long) record.getValue().get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN);
    final int partitionNumber = Math.toIntExact((long) record.getValue().getOrDefault(Constants.DRUID_SHARD_KEY_COL_NAME, -1L));
    final InputRow inputRow = new MapBasedInputRow(timestamp, dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), record.getValue());
    try {
        if (partitionNumber != -1 && maxPartitionSize == -1) {
            /*
        Case data is sorted by time and an extra hashing dimension see DRUID_SHARD_KEY_COL_NAME
        Thus use DRUID_SHARD_KEY_COL_NAME as segment partition in addition to time dimension
        Data with the same DRUID_SHARD_KEY_COL_NAME and Time interval will end in the same segment
        */
            DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(timestamp));
            final Interval interval = new Interval(truncatedDateTime, segmentGranularity.increment(truncatedDateTime));
            if (currentOpenSegment != null) {
                if (currentOpenSegment.getShardSpec().getPartitionNum() != partitionNumber || !currentOpenSegment.getInterval().equals(interval)) {
                    pushSegments(ImmutableList.of(currentOpenSegment));
                    currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
                }
            } else {
                currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
            }
            appenderator.add(currentOpenSegment, inputRow, committerSupplier::get);
        } else if (partitionNumber == -1 && maxPartitionSize != -1) {
            /*Case we are partitioning the segments based on time and max row per segment maxPartitionSize*/
            appenderator.add(getSegmentIdentifierAndMaybePush(timestamp), inputRow, committerSupplier::get);
        } else {
            throw new IllegalArgumentException(String.format("partitionNumber and maxPartitionSize should be mutually exclusive " + "got partitionNum [%s] and maxPartitionSize [%s]", partitionNumber, maxPartitionSize));
        }
    } catch (SegmentNotWritableException e) {
        throw new IOException(e);
    }
}
Also used : SegmentNotWritableException(org.apache.druid.segment.realtime.appenderator.SegmentNotWritableException) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) IOException(java.io.IOException) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DateTime(org.joda.time.DateTime) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Interval(org.joda.time.Interval)

Example 25 with LinearShardSpec

use of org.apache.druid.timeline.partition.LinearShardSpec in project hive by apache.

the class DruidRecordWriter method getSegmentIdentifierAndMaybePush.

/**
 * This function computes the segment identifier and push the current open segment
 * The push will occur if max size is reached or the event belongs to the next interval.
 * Note that this function assumes that timestamps are pseudo sorted.
 * This function will close and move to the next segment granularity as soon as
 * an event from the next interval appears. The sorting is done by the previous stage.
 *
 * @return segmentIdentifier with of the truncatedTime and maybe push the current open segment.
 */
private SegmentIdWithShardSpec getSegmentIdentifierAndMaybePush(long truncatedTime) {
    DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(truncatedTime));
    final Interval interval = new Interval(truncatedDateTime, segmentGranularity.increment(truncatedDateTime));
    SegmentIdWithShardSpec retVal;
    if (currentOpenSegment == null) {
        currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(0));
        return currentOpenSegment;
    } else if (currentOpenSegment.getInterval().equals(interval)) {
        retVal = currentOpenSegment;
        int rowCount = appenderator.getRowCount(retVal);
        if (rowCount < maxPartitionSize) {
            return retVal;
        } else {
            retVal = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(currentOpenSegment.getShardSpec().getPartitionNum() + 1));
            pushSegments(Lists.newArrayList(currentOpenSegment));
            LOG.info("Creating new partition for segment {}, partition num {}", retVal.toString(), retVal.getShardSpec().getPartitionNum());
            currentOpenSegment = retVal;
            return retVal;
        }
    } else {
        retVal = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(0));
        pushSegments(Lists.newArrayList(currentOpenSegment));
        LOG.info("Creating segment {}", retVal.toString());
        currentOpenSegment = retVal;
        return retVal;
    }
}
Also used : LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval)

Aggregations

LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)42 DataSegment (org.apache.druid.timeline.DataSegment)30 Test (org.junit.Test)18 QueryableIndex (org.apache.druid.segment.QueryableIndex)14 Interval (org.joda.time.Interval)14 GeneratorSchemaInfo (org.apache.druid.segment.generator.GeneratorSchemaInfo)12 SegmentGenerator (org.apache.druid.segment.generator.SegmentGenerator)12 SpecificSegmentsQuerySegmentWalker (org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker)12 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)11 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)9 Setup (org.openjdk.jmh.annotations.Setup)9 SegmentIdWithShardSpec (org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)8 MetadataStorageTablesConfig (org.apache.druid.metadata.MetadataStorageTablesConfig)7 IndexBuilder (org.apache.druid.segment.IndexBuilder)7 DataSegmentPusher (org.apache.druid.segment.loading.DataSegmentPusher)7 HdfsDataSegmentPusher (org.apache.druid.storage.hdfs.HdfsDataSegmentPusher)7 HdfsDataSegmentPusherConfig (org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig)7 LocalFileSystem (org.apache.hadoop.fs.LocalFileSystem)7 Path (org.apache.hadoop.fs.Path)7 File (java.io.File)6