use of io.druid.timeline.partition.LinearShardSpec in project druid by druid-io.
the class CalciteTests method createMockWalker.
public static SpecificSegmentsQuerySegmentWalker createMockWalker(final File tmpDir) {
final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(INDEX_SCHEMA).rows(ROWS1).buildMMappedIndex();
final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(INDEX_SCHEMA).rows(ROWS2).buildMMappedIndex();
return new SpecificSegmentsQuerySegmentWalker(queryRunnerFactoryConglomerate()).add(DataSegment.builder().dataSource(DATASOURCE1).interval(index1.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index1).add(DataSegment.builder().dataSource(DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index2);
}
use of io.druid.timeline.partition.LinearShardSpec in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinator method allocatePendingSegment.
@Override
public SegmentIdentifier allocatePendingSegment(final String dataSource, final String sequenceName, final String previousSegmentId, final Interval interval, final String maxVersion) throws IOException {
Preconditions.checkNotNull(dataSource, "dataSource");
Preconditions.checkNotNull(sequenceName, "sequenceName");
Preconditions.checkNotNull(interval, "interval");
Preconditions.checkNotNull(maxVersion, "maxVersion");
final String previousSegmentIdNotNull = previousSegmentId == null ? "" : previousSegmentId;
return connector.retryTransaction(new TransactionCallback<SegmentIdentifier>() {
@Override
public SegmentIdentifier inTransaction(Handle handle, TransactionStatus transactionStatus) throws Exception {
final List<byte[]> existingBytes = handle.createQuery(String.format("SELECT payload FROM %s WHERE " + "dataSource = :dataSource AND " + "sequence_name = :sequence_name AND " + "sequence_prev_id = :sequence_prev_id", dbTables.getPendingSegmentsTable())).bind("dataSource", dataSource).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).map(ByteArrayMapper.FIRST).list();
if (!existingBytes.isEmpty()) {
final SegmentIdentifier existingIdentifier = jsonMapper.readValue(Iterables.getOnlyElement(existingBytes), SegmentIdentifier.class);
if (existingIdentifier.getInterval().getStartMillis() == interval.getStartMillis() && existingIdentifier.getInterval().getEndMillis() == interval.getEndMillis()) {
log.info("Found existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
return existingIdentifier;
} else {
log.warn("Cannot use existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB, " + "does not match requested interval[%s]", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull, interval);
return null;
}
}
// Make up a pending segment based on existing segments and pending segments in the DB. This works
// assuming that all tasks inserting segments at a particular point in time are going through the
// allocatePendingSegment flow. This should be assured through some other mechanism (like task locks).
final SegmentIdentifier newIdentifier;
final List<TimelineObjectHolder<String, DataSegment>> existingChunks = getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)).lookup(interval);
if (existingChunks.size() > 1) {
// Not possible to expand more than one chunk with a single segment.
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: already have [%,d] chunks.", dataSource, interval, maxVersion, existingChunks.size());
return null;
} else {
SegmentIdentifier max = null;
if (!existingChunks.isEmpty()) {
TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
max = SegmentIdentifier.fromDataSegment(existing.getObject());
}
}
}
final List<SegmentIdentifier> pendings = getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval);
for (SegmentIdentifier pending : pendings) {
if (max == null || pending.getVersion().compareTo(max.getVersion()) > 0 || (pending.getVersion().equals(max.getVersion()) && pending.getShardSpec().getPartitionNum() > max.getShardSpec().getPartitionNum())) {
max = pending;
}
}
if (max == null) {
newIdentifier = new SegmentIdentifier(dataSource, interval, maxVersion, new NumberedShardSpec(0, 0));
} else if (!max.getInterval().equals(interval) || max.getVersion().compareTo(maxVersion) > 0) {
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: conflicting segment[%s].", dataSource, interval, maxVersion, max.getIdentifierAsString());
return null;
} else if (max.getShardSpec() instanceof LinearShardSpec) {
newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new LinearShardSpec(max.getShardSpec().getPartitionNum() + 1));
} else if (max.getShardSpec() instanceof NumberedShardSpec) {
newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new NumberedShardSpec(max.getShardSpec().getPartitionNum() + 1, ((NumberedShardSpec) max.getShardSpec()).getPartitions()));
} else {
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: ShardSpec class[%s] used by [%s].", dataSource, interval, maxVersion, max.getShardSpec().getClass(), max.getIdentifierAsString());
return null;
}
}
// SELECT -> INSERT can fail due to races; callers must be prepared to retry.
// Avoiding ON DUPLICATE KEY since it's not portable.
// Avoiding try/catch since it may cause inadvertent transaction-splitting.
// UNIQUE key for the row, ensuring sequences do not fork in two directions.
// Using a single column instead of (sequence_name, sequence_prev_id) as some MySQL storage engines
// have difficulty with large unique keys (see https://github.com/druid-io/druid/issues/2319)
final String sequenceNamePrevIdSha1 = BaseEncoding.base16().encode(Hashing.sha1().newHasher().putBytes(StringUtils.toUtf8(sequenceName)).putByte((byte) 0xff).putBytes(StringUtils.toUtf8(previousSegmentIdNotNull)).hash().asBytes());
handle.createStatement(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, sequence_name_prev_id_sha1, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, :sequence_name_prev_id_sha1, :payload)", dbTables.getPendingSegmentsTable(), connector.getQuoteString())).bind("id", newIdentifier.getIdentifierAsString()).bind("dataSource", dataSource).bind("created_date", new DateTime().toString()).bind("start", interval.getStart().toString()).bind("end", interval.getEnd().toString()).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).bind("sequence_name_prev_id_sha1", sequenceNamePrevIdSha1).bind("payload", jsonMapper.writeValueAsBytes(newIdentifier)).execute();
log.info("Allocated pending segment [%s] for sequence[%s] (previous = [%s]) in DB", newIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
return newIdentifier;
}
}, ALLOCATE_SEGMENT_QUIET_TRIES, SQLMetadataConnector.DEFAULT_MAX_TRIES);
}
use of io.druid.timeline.partition.LinearShardSpec in project hive by apache.
the class DruidRecordWriter method getSegmentIdentifierAndMaybePush.
/**
* This function computes the segment identifier and push the current open segment
* The push will occur if max size is reached or the event belongs to the next interval.
* Note that this function assumes that timestamps are pseudo sorted.
* This function will close and move to the next segment granularity as soon as
* an event from the next interval appears. The sorting is done by the previous stage.
*
* @return segmentIdentifier with of the truncatedTime and maybe push the current open segment.
*/
private SegmentIdentifier getSegmentIdentifierAndMaybePush(long truncatedTime) {
final Interval interval = new Interval(new DateTime(truncatedTime), segmentGranularity.increment(new DateTime(truncatedTime)));
SegmentIdentifier retVal;
if (currentOpenSegment == null) {
currentOpenSegment = new SegmentIdentifier(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(0));
return currentOpenSegment;
} else if (currentOpenSegment.getInterval().equals(interval)) {
retVal = currentOpenSegment;
int rowCount = appenderator.getRowCount(retVal);
if (rowCount < maxPartitionSize) {
return retVal;
} else {
retVal = new SegmentIdentifier(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(currentOpenSegment.getShardSpec().getPartitionNum() + 1));
pushSegments(Lists.newArrayList(currentOpenSegment));
LOG.info("Creating new partition for segment {}, partition num {}", retVal.getIdentifierAsString(), retVal.getShardSpec().getPartitionNum());
currentOpenSegment = retVal;
return retVal;
}
} else {
retVal = new SegmentIdentifier(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(0));
pushSegments(Lists.newArrayList(currentOpenSegment));
LOG.info("Creating segment {}", retVal.getIdentifierAsString());
currentOpenSegment = retVal;
return retVal;
}
}
use of io.druid.timeline.partition.LinearShardSpec in project hive by apache.
the class DruidRecordWriter method write.
@Override
public void write(Writable w) throws IOException {
DruidWritable record = (DruidWritable) w;
final long timestamp = (long) record.getValue().get(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN);
final long truncatedTime = (long) record.getValue().get(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME);
final int partitionNumber = Math.toIntExact((long) record.getValue().getOrDefault(Constants.DRUID_SHARD_KEY_COL_NAME, -1l));
final InputRow inputRow = new MapBasedInputRow(timestamp, dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), record.getValue());
try {
if (partitionNumber != -1 && maxPartitionSize == -1) {
final Interval interval = new Interval(new DateTime(truncatedTime), segmentGranularity.increment(new DateTime(truncatedTime)));
if (currentOpenSegment != null) {
if (currentOpenSegment.getShardSpec().getPartitionNum() != partitionNumber || !currentOpenSegment.getInterval().equals(interval)) {
pushSegments(ImmutableList.of(currentOpenSegment));
currentOpenSegment = new SegmentIdentifier(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
}
} else if (currentOpenSegment == null) {
currentOpenSegment = new SegmentIdentifier(dataSchema.getDataSource(), interval, tuningConfig.getVersioningPolicy().getVersion(interval), new LinearShardSpec(partitionNumber));
}
appenderator.add(currentOpenSegment, inputRow, committerSupplier);
} else if (partitionNumber == -1 && maxPartitionSize != -1) {
appenderator.add(getSegmentIdentifierAndMaybePush(truncatedTime), inputRow, committerSupplier);
} else {
throw new IllegalArgumentException(String.format("partitionNumber and maxPartitionSize should be mutually exclusive got partitionNum [%s] and maxPartitionSize [%s]", partitionNumber, maxPartitionSize));
}
} catch (SegmentNotWritableException e) {
throw new IOException(e);
}
}
use of io.druid.timeline.partition.LinearShardSpec in project hive by apache.
the class TestDruidStorageHandler method testCommitInsertIntoTable.
@Test
public void testCommitInsertIntoTable() throws MetaException, IOException {
DerbyConnectorTestUtility connector = derbyConnectorRule.getConnector();
MetadataStorageTablesConfig metadataStorageTablesConfig = derbyConnectorRule.metadataTablesConfigSupplier().get();
druidStorageHandler.preCreateTable(tableMock);
LocalFileSystem localFileSystem = FileSystem.getLocal(config);
Path taskDirPath = new Path(tableWorkingPath, druidStorageHandler.makeStagingName());
List<DataSegment> existingSegments = Arrays.asList(createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v0", new LinearShardSpec(1)));
HdfsDataSegmentPusherConfig pusherConfig = new HdfsDataSegmentPusherConfig();
pusherConfig.setStorageDirectory(config.get(String.valueOf(HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY)));
DataSegmentPusher dataSegmentPusher = new HdfsDataSegmentPusher(pusherConfig, config, DruidStorageHandlerUtils.JSON_MAPPER);
DruidStorageHandlerUtils.publishSegmentsAndCommit(connector, metadataStorageTablesConfig, DATA_SOURCE_NAME, existingSegments, true, config, dataSegmentPusher);
DataSegment dataSegment = createSegment(new Path(taskDirPath, DruidStorageHandlerUtils.INDEX_ZIP).toString(), new Interval(100, 150, DateTimeZone.UTC), "v1", new LinearShardSpec(0));
Path descriptorPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(dataSegment, new Path(taskDirPath, DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME));
DruidStorageHandlerUtils.writeSegmentDescriptor(localFileSystem, dataSegment, descriptorPath);
druidStorageHandler.commitInsertTable(tableMock, false);
Assert.assertArrayEquals(Lists.newArrayList(DATA_SOURCE_NAME).toArray(), Lists.newArrayList(DruidStorageHandlerUtils.getAllDataSourceNames(connector, metadataStorageTablesConfig)).toArray());
final List<DataSegment> dataSegmentList = getUsedSegmentsList(connector, metadataStorageTablesConfig);
Assert.assertEquals(2, dataSegmentList.size());
DataSegment persistedSegment = dataSegmentList.get(1);
// Insert into appends to old version
Assert.assertEquals("v0", persistedSegment.getVersion());
Assert.assertTrue(persistedSegment.getShardSpec() instanceof LinearShardSpec);
Assert.assertEquals(2, persistedSegment.getShardSpec().getPartitionNum());
Path expectedFinalHadoopPath = new Path(dataSegmentPusher.getPathForHadoop(), dataSegmentPusher.makeIndexPathName(persistedSegment, DruidStorageHandlerUtils.INDEX_ZIP));
Assert.assertEquals(ImmutableMap.of("type", "hdfs", "path", expectedFinalHadoopPath.toString()), persistedSegment.getLoadSpec());
Assert.assertEquals("dummySegmentData", FileUtils.readFileToString(new File(expectedFinalHadoopPath.toUri())));
}
Aggregations