use of org.apache.druid.timeline.partition.LinearShardSpec in project druid by druid-io.
the class ExpressionVectorSelectorBenchmark method setup.
@Setup(Level.Trial)
public void setup() {
this.closer = Closer.create();
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench");
final DataSegment dataSegment = DataSegment.builder().dataSource("foo").interval(schemaInfo.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build();
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
this.index = closer.register(segmentGenerator.generate(dataSegment, schemaInfo, Granularities.HOUR, rowsPerSegment));
Expr parsed = Parser.parse(expression, ExprMacroTable.nil());
outputType = parsed.getOutputType(new ColumnInspector() {
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String column) {
return QueryableIndexStorageAdapter.getColumnCapabilities(index, column);
}
});
checkSanity();
}
use of org.apache.druid.timeline.partition.LinearShardSpec in project hive by apache.
the class TestDruidStorageHandler method testCommitInsertIntoTable.
@Test
public void testCommitInsertIntoTable() throws MetaException, IOException {
DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
druidStorageHandler.preCreateTable(tableMock);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
List<DataSegment> existingSegments = Collections.singletonList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(1)));
HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
druidStorageHandler.commitInsertTable(tableMock, false);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
Assert.assertEquals(2, dataSegmentList.size());
DataSegment persistedSegment = dataSegmentList.get(1);
// Insert into appends to old version
Assert.assertEquals("v0", persistedSegment.getVersion());
Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
Assert.assertEquals(2, persistedSegment.getShardSpec().getPartitionNum());
Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
use of org.apache.druid.timeline.partition.LinearShardSpec in project hive by apache.
the class TestDruidStorageHandler method testCommitInsertIntoWithNonExtendableSegment.
@Test(expected = MetaException.class)
public void testCommitInsertIntoWithNonExtendableSegment() throws MetaException, IOException {
DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
druidStorageHandler.preCreateTable(tableMock);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, "index_old_1.zip").toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", NoneShardSpec.instance()), createSegment(new Path(taskDirPath, "index_old_2.zip").toString(), new Interval(200, 250, DateTimeZone.UTC), "v0", new LinearShardSpec(0)), createSegment(new Path(taskDirPath, "index_old_3.zip").toString(), new Interval(250, 300, DateTimeZone.UTC), "v0", new LinearShardSpec(0)));
HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
pusherConfig.setStorageDirectory(taskDirPath.toString());
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
// Try appending to non extendable shard spec
DataSegment conflictingSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(conflictingSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, conflictingSegment, descriptorPath);
druidStorageHandler.commitInsertTable(tableMock, false);
}
use of org.apache.druid.timeline.partition.LinearShardSpec in project hive by apache.
the class DruidRecordWriter method write.
@Override
public void write(Writable w) throws IOException {
DruidWritable record = (DruidWritable) w;
final long timestamp = (long) record.getValue().get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN);
final int partitionNumber = Math.toIntExact((long) record.getValue().getOrDefault(Constants.DRUID_SHARD_KEY_COL_NAME, -1L));
final InputRow inputRow = new MapBasedInputRow(timestamp, dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), record.getValue());
try {
if (partitionNumber != -1 && maxPartitionSize == -1) {
/*
Case data is sorted by time and an extra hashing dimension see DRUID_SHARD_KEY_COL_NAME
Thus use DRUID_SHARD_KEY_COL_NAME as segment partition in addition to time dimension
Data with the same DRUID_SHARD_KEY_COL_NAME and Time interval will end in the same segment
*/
DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(timestamp));
final Interval interval = new Interval(truncatedDateTime, segmentGranularity.increment(truncatedDateTime));
if (currentOpenSegment != null) {
if (currentOpenSegment.getShardSpec().getPartitionNum() != partitionNumber || !currentOpenSegment.getInterval().equals(interval)) {
pushSegments(ImmutableList.of(currentOpenSegment));
currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
}
} else {
currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
}
appenderator.add(currentOpenSegment, inputRow, committerSupplier::get);
} else if (partitionNumber == -1 && maxPartitionSize != -1) {
/*Case we are partitioning the segments based on time and max row per segment maxPartitionSize*/
appenderator.add(getSegmentIdentifierAndMaybePush(timestamp), inputRow, committerSupplier::get);
} else {
throw new IllegalArgumentException(String.format("partitionNumber and maxPartitionSize should be mutually exclusive " + "got partitionNum [%s] and maxPartitionSize [%s]", partitionNumber, maxPartitionSize));
}
} catch (SegmentNotWritableException e) {
throw new IOException(e);
}
}
use of org.apache.druid.timeline.partition.LinearShardSpec in project hive by apache.
the class DruidRecordWriter method getSegmentIdentifierAndMaybePush.
/**
* This function computes the segment identifier and push the current open segment
* The push will occur if max size is reached or the event belongs to the next interval.
* Note that this function assumes that timestamps are pseudo sorted.
* This function will close and move to the next segment granularity as soon as
* an event from the next interval appears. The sorting is done by the previous stage.
*
* @return segmentIdentifier with of the truncatedTime and maybe push the current open segment.
*/
private SegmentIdWithShardSpec getSegmentIdentifierAndMaybePush(long truncatedTime) {
DateTime truncatedDateTime = segmentGranularity.bucketStart(DateTimes.utc(truncatedTime));
final Interval interval = new Interval(truncatedDateTime, segmentGranularity.increment(truncatedDateTime));
SegmentIdWithShardSpec retVal;
if (currentOpenSegment == null) {
currentOpenSegment = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(0));
return currentOpenSegment;
} else if (currentOpenSegment.getInterval().equals(interval)) {
retVal = currentOpenSegment;
int rowCount = appenderator.getRowCount(retVal);
if (rowCount < maxPartitionSize) {
return retVal;
} else {
retVal = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(currentOpenSegment.getShardSpec().getPartitionNum() + 1));
pushSegments(Lists.newArrayList(currentOpenSegment));
LOG.info("Creating new partition for segment {}, partition num {}", retVal.toString(), retVal.getShardSpec().getPartitionNum());
currentOpenSegment = retVal;
return retVal;
}
} else {
retVal = new SegmentIdWithShardSpec(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(0));
pushSegments(Lists.newArrayList(currentOpenSegment));
LOG.info("Creating segment {}", retVal.toString());
currentOpenSegment = retVal;
return retVal;
}
}
Aggregations