use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class HadoopIngestionSpec method updateSegmentListIfDatasourcePathSpecIsUsed.
public static HadoopIngestionSpec updateSegmentListIfDatasourcePathSpecIsUsed(HadoopIngestionSpec spec, ObjectMapper jsonMapper, UsedSegmentLister segmentLister) throws IOException {
String dataSource = "dataSource";
String type = "type";
String multi = "multi";
String children = "children";
String segments = "segments";
String ingestionSpec = "ingestionSpec";
Map<String, Object> pathSpec = spec.getIOConfig().getPathSpec();
Map<String, Object> datasourcePathSpec = null;
if (pathSpec.get(type).equals(dataSource)) {
datasourcePathSpec = pathSpec;
} else if (pathSpec.get(type).equals(multi)) {
List<Map<String, Object>> childPathSpecs = (List<Map<String, Object>>) pathSpec.get(children);
for (Map<String, Object> childPathSpec : childPathSpecs) {
if (childPathSpec.get(type).equals(dataSource)) {
datasourcePathSpec = childPathSpec;
break;
}
}
}
if (datasourcePathSpec != null) {
Map<String, Object> ingestionSpecMap = (Map<String, Object>) datasourcePathSpec.get(ingestionSpec);
DatasourceIngestionSpec ingestionSpecObj = jsonMapper.convertValue(ingestionSpecMap, DatasourceIngestionSpec.class);
List<DataSegment> segmentsList = segmentLister.getUsedSegmentsForIntervals(ingestionSpecObj.getDataSource(), ingestionSpecObj.getIntervals());
if (ingestionSpecObj.getSegments() != null) {
//ensure that user supplied segment list matches with the segmentsList obtained from db
//this safety check lets users do test-n-set kind of batch delta ingestion where the delta
//ingestion task would only run if current state of the system is same as when they submitted
//the task.
List<DataSegment> userSuppliedSegmentsList = ingestionSpecObj.getSegments();
if (segmentsList.size() == userSuppliedSegmentsList.size()) {
Set<DataSegment> segmentsSet = new HashSet<>(segmentsList);
for (DataSegment userSegment : userSuppliedSegmentsList) {
if (!segmentsSet.contains(userSegment)) {
throw new IOException("user supplied segments list did not match with segments list obtained from db");
}
}
} else {
throw new IOException("user supplied segments list did not match with segments list obtained from db");
}
}
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
for (DataSegment segment : segmentsList) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List<WindowedDataSegment> windowedSegments = Lists.newArrayList();
for (Interval interval : ingestionSpecObj.getIntervals()) {
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
windowedSegments.add(new WindowedDataSegment(chunk.getObject(), holder.getInterval()));
}
}
datasourcePathSpec.put(segments, windowedSegments);
}
}
return spec;
}
use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
if (taskToolbox == null) {
// Noop Task is just used to create the toolbox and list segments.
taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
}
try {
final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
for (DataSegment segment : usedSegments) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
final List<String> dims;
if (dimensions != null) {
dims = dimensions;
} else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
} else {
Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getDimensions();
}
}));
}
})));
dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
}
final List<String> metricsList;
if (metrics != null) {
metricsList = metrics;
} else {
Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getMetrics();
}
}));
}
})));
metricsList = Lists.newArrayList(metricsSet);
}
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
});
}
})));
return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
} catch (IOException e) {
throw Throwables.propagate(e);
} catch (SegmentLoadingException e) {
throw Throwables.propagate(e);
}
}
use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class SQLMetadataSegmentManager method enableDatasource.
@Override
public boolean enableDatasource(final String ds) {
try {
final IDBI dbi = connector.getDBI();
VersionedIntervalTimeline<String, DataSegment> segmentTimeline = connector.inReadOnlyTransaction(new TransactionCallback<VersionedIntervalTimeline<String, DataSegment>>() {
@Override
public VersionedIntervalTimeline<String, DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
return handle.createQuery(String.format("SELECT payload FROM %s WHERE dataSource = :dataSource", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).bind("dataSource", ds).map(ByteArrayMapper.FIRST).fold(new VersionedIntervalTimeline<String, DataSegment>(Ordering.natural()), new Folder3<VersionedIntervalTimeline<String, DataSegment>, byte[]>() {
@Override
public VersionedIntervalTimeline<String, DataSegment> fold(VersionedIntervalTimeline<String, DataSegment> timeline, byte[] payload, FoldController foldController, StatementContext statementContext) throws SQLException {
try {
final DataSegment segment = DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(payload, DataSegment.class));
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
return timeline;
} catch (Exception e) {
throw new SQLException(e.toString());
}
}
});
}
});
final List<DataSegment> segments = Lists.newArrayList();
for (TimelineObjectHolder<String, DataSegment> objectHolder : segmentTimeline.lookup(new Interval("0000-01-01/3000-01-01"))) {
for (PartitionChunk<DataSegment> partitionChunk : objectHolder.getObject()) {
segments.add(partitionChunk.getObject());
}
}
if (segments.isEmpty()) {
log.warn("No segments found in the database!");
return false;
}
dbi.withHandle(new HandleCallback<Void>() {
@Override
public Void withHandle(Handle handle) throws Exception {
Batch batch = handle.createBatch();
for (DataSegment segment : segments) {
batch.add(String.format("UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segment.getIdentifier()));
}
batch.execute();
return null;
}
});
} catch (Exception e) {
log.error(e, "Exception enabling datasource %s", ds);
return false;
}
return true;
}
use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class DruidCoordinatorRuleRunner method run.
@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
replicatorThrottler.updateParams(coordinator.getDynamicConfigs().getReplicationThrottleLimit(), coordinator.getDynamicConfigs().getReplicantLifetime());
CoordinatorStats stats = new CoordinatorStats();
DruidCluster cluster = params.getDruidCluster();
if (cluster.isEmpty()) {
log.warn("Uh... I have no servers. Not assigning anything...");
return params;
}
// find available segments which are not overshadowed by other segments in DB
// only those would need to be loaded/dropped
// anything overshadowed by served segments is dropped automatically by DruidCoordinatorCleanupOvershadowed
Map<String, VersionedIntervalTimeline<String, DataSegment>> timelines = new HashMap<>();
for (DataSegment segment : params.getAvailableSegments()) {
VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(segment.getDataSource());
if (timeline == null) {
timeline = new VersionedIntervalTimeline<>(Comparators.comparable());
timelines.put(segment.getDataSource(), timeline);
}
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
Set<DataSegment> overshadowed = new HashSet<>();
for (VersionedIntervalTimeline<String, DataSegment> timeline : timelines.values()) {
for (TimelineObjectHolder<String, DataSegment> holder : timeline.findOvershadowed()) {
for (DataSegment dataSegment : holder.getObject().payloads()) {
overshadowed.add(dataSegment);
}
}
}
Set<DataSegment> nonOvershadowed = new HashSet<>();
for (DataSegment dataSegment : params.getAvailableSegments()) {
if (!overshadowed.contains(dataSegment)) {
nonOvershadowed.add(dataSegment);
}
}
for (String tier : cluster.getTierNames()) {
replicatorThrottler.updateReplicationState(tier);
}
DruidCoordinatorRuntimeParams paramsWithReplicationManager = params.buildFromExistingWithoutAvailableSegments().withReplicationManager(replicatorThrottler).withAvailableSegments(nonOvershadowed).build();
// Run through all matched rules for available segments
DateTime now = new DateTime();
MetadataRuleManager databaseRuleManager = paramsWithReplicationManager.getDatabaseRuleManager();
final List<String> segmentsWithMissingRules = Lists.newArrayListWithCapacity(MAX_MISSING_RULES);
int missingRules = 0;
for (DataSegment segment : paramsWithReplicationManager.getAvailableSegments()) {
List<Rule> rules = databaseRuleManager.getRulesWithDefault(segment.getDataSource());
boolean foundMatchingRule = false;
for (Rule rule : rules) {
if (rule.appliesTo(segment, now)) {
stats.accumulate(rule.run(coordinator, paramsWithReplicationManager, segment));
foundMatchingRule = true;
break;
}
}
if (!foundMatchingRule) {
if (segmentsWithMissingRules.size() < MAX_MISSING_RULES) {
segmentsWithMissingRules.add(segment.getIdentifier());
}
missingRules++;
}
}
if (!segmentsWithMissingRules.isEmpty()) {
log.makeAlert("Unable to find matching rules!").addData("segmentsWithMissingRulesCount", missingRules).addData("segmentsWithMissingRules", segmentsWithMissingRules).emit();
}
return paramsWithReplicationManager.buildFromExistingWithoutAvailableSegments().withCoordinatorStats(stats).withAvailableSegments(params.getAvailableSegments()).build();
}
use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.
the class SchemalessIndexTest method makeAppendedMMappedIndex.
private static QueryableIndex makeAppendedMMappedIndex(Iterable<Pair<String, AggregatorFactory[]>> files, final List<Interval> intervals) {
try {
File tmpFile = File.createTempFile("yay", "boo");
tmpFile.delete();
File mergedFile = new File(tmpFile, "merged");
mergedFile.mkdirs();
mergedFile.deleteOnExit();
List<File> filesToMap = makeFilesToMap(tmpFile, files);
VersionedIntervalTimeline<Integer, File> timeline = new VersionedIntervalTimeline<Integer, File>(Ordering.natural().nullsFirst());
ShardSpec noneShardSpec = NoneShardSpec.instance();
for (int i = 0; i < intervals.size(); i++) {
timeline.add(intervals.get(i), i, noneShardSpec.createChunk(filesToMap.get(i)));
}
final List<IndexableAdapter> adapters = Lists.newArrayList(Iterables.concat(// TimelineObjectHolder is actually an iterable of iterable of indexable adapters
Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<Integer, File>, Iterable<IndexableAdapter>>() {
@Override
public Iterable<IndexableAdapter> apply(final TimelineObjectHolder<Integer, File> timelineObjectHolder) {
return Iterables.transform(timelineObjectHolder.getObject(), // Each chunk can be used to build the actual IndexableAdapter
new Function<PartitionChunk<File>, IndexableAdapter>() {
@Override
public IndexableAdapter apply(PartitionChunk<File> chunk) {
try {
return new RowboatFilteringIndexAdapter(new QueryableIndexIndexableAdapter(INDEX_IO.loadIndex(chunk.getObject())), new Predicate<Rowboat>() {
@Override
public boolean apply(Rowboat input) {
return timelineObjectHolder.getInterval().contains(input.getTimestamp());
}
});
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
});
}
})));
return INDEX_IO.loadIndex(INDEX_MERGER.append(adapters, null, mergedFile, indexSpec));
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
Aggregations