use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinatorTest method testAnnounceHistoricalSegments.
@Test
public void testAnnounceHistoricalSegments() throws IOException {
Set<DataSegment> segments = new HashSet<>();
for (int i = 0; i < 105; i++) {
segments.add(new DataSegment("fooDataSource", Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "version", ImmutableMap.of(), ImmutableList.of("dim1"), ImmutableList.of("m1"), new LinearShardSpec(i), 9, 100));
}
coordinator.announceHistoricalSegments(segments);
for (DataSegment segment : segments) {
Assert.assertArrayEquals(mapper.writeValueAsString(segment).getBytes(StandardCharsets.UTF_8), derbyConnector.lookup(derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable(), "id", "payload", segment.getId().toString()));
}
List<String> segmentIds = segments.stream().map(segment -> segment.getId().toString()).collect(Collectors.toList());
segmentIds.sort(Comparator.naturalOrder());
Assert.assertEquals(segmentIds, retrieveUsedSegmentIds());
// Should not update dataSource metadata.
Assert.assertEquals(0, metadataUpdateCounter.get());
}
use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.
the class CompactionTask method createDimensionsSpec.
private static DimensionsSpec createDimensionsSpec(List<NonnullPair<QueryableIndex, DataSegment>> queryableIndices) {
final BiMap<String, Integer> uniqueDims = HashBiMap.create();
final Map<String, DimensionSchema> dimensionSchemaMap = new HashMap<>();
// Here, we try to retain the order of dimensions as they were specified since the order of dimensions may be
// optimized for performance.
// Dimensions are extracted from the recent segments to olders because recent segments are likely to be queried more
// frequently, and thus the performance should be optimized for recent ones rather than old ones.
// sort timelineSegments in order of interval, see https://github.com/apache/druid/pull/9905
queryableIndices.sort((o1, o2) -> Comparators.intervalsByStartThenEnd().compare(o1.rhs.getInterval(), o2.rhs.getInterval()));
int index = 0;
for (NonnullPair<QueryableIndex, DataSegment> pair : Lists.reverse(queryableIndices)) {
final QueryableIndex queryableIndex = pair.lhs;
final Map<String, DimensionHandler> dimensionHandlerMap = queryableIndex.getDimensionHandlers();
for (String dimension : queryableIndex.getAvailableDimensions()) {
final ColumnHolder columnHolder = Preconditions.checkNotNull(queryableIndex.getColumnHolder(dimension), "Cannot find column for dimension[%s]", dimension);
if (!uniqueDims.containsKey(dimension)) {
final DimensionHandler dimensionHandler = Preconditions.checkNotNull(dimensionHandlerMap.get(dimension), "Cannot find dimensionHandler for dimension[%s]", dimension);
uniqueDims.put(dimension, index++);
dimensionSchemaMap.put(dimension, createDimensionSchema(dimension, columnHolder.getCapabilities(), dimensionHandler.getMultivalueHandling()));
}
}
}
final BiMap<Integer, String> orderedDims = uniqueDims.inverse();
final List<DimensionSchema> dimensionSchemas = IntStream.range(0, orderedDims.size()).mapToObj(i -> {
final String dimName = orderedDims.get(i);
return Preconditions.checkNotNull(dimensionSchemaMap.get(dimName), "Cannot find dimension[%s] from dimensionSchemaMap", dimName);
}).collect(Collectors.toList());
return new DimensionsSpec(dimensionSchemas);
}
use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.
the class MaterializedViewSupervisor method getVersionAndBaseSegments.
private Pair<Map<Interval, String>, Map<Interval, List<DataSegment>>> getVersionAndBaseSegments(Collection<DataSegment> snapshot) {
Map<Interval, String> versions = new HashMap<>();
Map<Interval, List<DataSegment>> segments = new HashMap<>();
for (DataSegment segment : snapshot) {
Interval interval = segment.getInterval();
versions.put(interval, segment.getVersion());
segments.computeIfAbsent(interval, i -> new ArrayList<>()).add(segment);
}
return new Pair<>(versions, segments);
}
use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.
the class MaterializedViewSupervisor method getMaxCreateDateAndBaseSegments.
private Pair<Map<Interval, String>, Map<Interval, List<DataSegment>>> getMaxCreateDateAndBaseSegments(Collection<Pair<DataSegment, String>> snapshot) {
Interval maxAllowedToBuildInterval = snapshot.parallelStream().map(pair -> pair.lhs).map(DataSegment::getInterval).max(Comparators.intervalsByStartThenEnd()).get();
Map<Interval, String> maxCreatedDate = new HashMap<>();
Map<Interval, List<DataSegment>> segments = new HashMap<>();
for (Pair<DataSegment, String> entry : snapshot) {
DataSegment segment = entry.lhs;
String createDate = entry.rhs;
Interval interval = segment.getInterval();
if (!hasEnoughLag(interval, maxAllowedToBuildInterval)) {
continue;
}
maxCreatedDate.merge(interval, createDate, (date1, date2) -> {
return DateTimes.max(DateTimes.of(date1), DateTimes.of(date2)).toString();
});
segments.computeIfAbsent(interval, i -> new ArrayList<>()).add(segment);
}
return new Pair<>(maxCreatedDate, segments);
}
use of org.apache.druid.indexing.overlord.Segments in project druid by druid-io.
the class CompactionTask method runTask.
@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
final List<ParallelIndexIngestionSpec> ingestionSpecs = createIngestionSchema(toolbox, getTaskLockHelper().getLockGranularityToUse(), segmentProvider, partitionConfigurationManager, dimensionsSpec, transformSpec, metricsSpec, granularitySpec, toolbox.getCoordinatorClient(), segmentCacheManagerFactory, retryPolicyFactory, ioConfig.isDropExisting());
final List<ParallelIndexSupervisorTask> indexTaskSpecs = IntStream.range(0, ingestionSpecs.size()).mapToObj(i -> {
// The ID of SubtaskSpecs is used as the base sequenceName in segment allocation protocol.
// The indexing tasks generated by the compaction task should use different sequenceNames
// so that they can allocate valid segment IDs with no duplication.
ParallelIndexIngestionSpec ingestionSpec = ingestionSpecs.get(i);
final String baseSequenceName = createIndexTaskSpecId(i);
return newTask(baseSequenceName, ingestionSpec);
}).collect(Collectors.toList());
if (indexTaskSpecs.isEmpty()) {
String msg = StringUtils.format("Can't find segments from inputSpec[%s], nothing to do.", ioConfig.getInputSpec());
log.warn(msg);
return TaskStatus.failure(getId(), msg);
} else {
registerResourceCloserOnAbnormalExit(currentSubTaskHolder);
final int totalNumSpecs = indexTaskSpecs.size();
log.info("Generated [%d] compaction task specs", totalNumSpecs);
int failCnt = 0;
for (ParallelIndexSupervisorTask eachSpec : indexTaskSpecs) {
final String json = toolbox.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(eachSpec);
if (!currentSubTaskHolder.setTask(eachSpec)) {
String errMsg = "Task was asked to stop. Finish as failed.";
log.info(errMsg);
return TaskStatus.failure(getId(), errMsg);
}
try {
if (eachSpec.isReady(toolbox.getTaskActionClient())) {
log.info("Running indexSpec: " + json);
final TaskStatus eachResult = eachSpec.run(toolbox);
if (!eachResult.isSuccess()) {
failCnt++;
log.warn("Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
}
} else {
failCnt++;
log.warn("indexSpec is not ready: [%s].\nTrying the next indexSpec.", json);
}
} catch (Exception e) {
failCnt++;
log.warn(e, "Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
}
}
String msg = StringUtils.format("Ran [%d] specs, [%d] succeeded, [%d] failed", totalNumSpecs, totalNumSpecs - failCnt, failCnt);
log.info(msg);
return failCnt == 0 ? TaskStatus.success(getId()) : TaskStatus.failure(getId(), msg);
}
}
Aggregations