use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
if (taskToolbox == null) {
// Noop Task is just used to create the toolbox and list segments.
taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
}
try {
final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
for (DataSegment segment : usedSegments) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
final List<String> dims;
if (dimensions != null) {
dims = dimensions;
} else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
} else {
Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getDimensions();
}
}));
}
})));
dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
}
final List<String> metricsList;
if (metrics != null) {
metricsList = metrics;
} else {
Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {
@Override
public Iterable<String> apply(PartitionChunk<DataSegment> input) {
return input.getObject().getMetrics();
}
}));
}
})));
metricsList = Lists.newArrayList(metricsSet);
}
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
});
}
})));
return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
} catch (IOException e) {
throw Throwables.propagate(e);
} catch (SegmentLoadingException e) {
throw Throwables.propagate(e);
}
}
use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.
the class SameIntervalMergeTaskTest method runTask.
private List<DataSegment> runTask(final SameIntervalMergeTask mergeTask, final String version) throws Exception {
boolean isReady = mergeTask.isReady(new TaskActionClient() {
@Override
public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
if (taskAction instanceof LockTryAcquireAction) {
// the lock of this interval is required
Assert.assertEquals(mergeTask.getInterval(), ((LockTryAcquireAction) taskAction).getInterval());
isRedayCountDown.countDown();
taskLock = new TaskLock(mergeTask.getGroupId(), mergeTask.getDataSource(), mergeTask.getInterval(), version);
return (RetType) taskLock;
}
return null;
}
});
// ensure LockTryAcquireAction is submitted
Assert.assertTrue(isReady);
final List<DataSegment> segments = Lists.newArrayList();
mergeTask.run(new TaskToolbox(null, null, new TaskActionClient() {
@Override
public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
if (taskAction instanceof LockListAction) {
Assert.assertNotNull("taskLock should be acquired before list", taskLock);
return (RetType) Arrays.asList(taskLock);
}
if (taskAction instanceof SegmentListUsedAction) {
List<DataSegment> segments = ImmutableList.of(DataSegment.builder().dataSource(mergeTask.getDataSource()).interval(new Interval("2010-01-01/PT1H")).version("oldVersion").shardSpec(new LinearShardSpec(0)).build(), DataSegment.builder().dataSource(mergeTask.getDataSource()).interval(new Interval("2010-01-01/PT1H")).version("oldVersion").shardSpec(new LinearShardSpec(0)).build(), DataSegment.builder().dataSource(mergeTask.getDataSource()).interval(new Interval("2010-01-01/PT2H")).version("oldVersion").shardSpec(new LinearShardSpec(0)).build());
return (RetType) segments;
}
if (taskAction instanceof SegmentInsertAction) {
publishCountDown.countDown();
return null;
}
return null;
}
}, new NoopServiceEmitter(), new DataSegmentPusher() {
@Deprecated
@Override
public String getPathForHadoop(String dataSource) {
return getPathForHadoop();
}
@Override
public String getPathForHadoop() {
return null;
}
@Override
public DataSegment push(File file, DataSegment segment) throws IOException {
// the merged segment is pushed to storage
segments.add(segment);
return segment;
}
}, null, null, null, null, null, null, null, null, new SegmentLoader() {
@Override
public boolean isSegmentLoaded(DataSegment segment) throws SegmentLoadingException {
return false;
}
@Override
public Segment getSegment(DataSegment segment) throws SegmentLoadingException {
return null;
}
@Override
public File getSegmentFiles(DataSegment segment) throws SegmentLoadingException {
// dummy file to represent the downloaded segment's dir
return new File("" + segment.getShardSpec().getPartitionNum());
}
@Override
public void cleanup(DataSegment segment) throws SegmentLoadingException {
}
}, jsonMapper, temporaryFolder.newFolder(), EasyMock.createMock(IndexMerger.class), indexIO, null, null, EasyMock.createMock(IndexMergerV9.class)));
return segments;
}
use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTimelineTest method constructorFeeder.
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> constructorFeeder() {
final List<TestCase> testCases = ImmutableList.of(TC("2000/2000T02", 3, 7, DS("2000/2000T01", "v1", 0, IR("2000", 1), IR("2000T00:01", 2)), DS("2000T01/2000T02", "v1", 0, IR("2000T01", 4))), /* Adjacent segments */
TC("2000/2000T02", 3, 7, DS("2000/2000T02", "v1", 0, IR("2000", 1), IR("2000T00:01", 2), IR("2000T01", 8)), DS("2000T01/2000T02", "v2", 0, IR("2000T01:01", 4))), /* 1H segment overlaid on top of 2H segment */
TC("2000/2000-01-02", 4, 23, DS("2000/2000-01-02", "v1", 0, IR("2000", 1), IR("2000T00:01", 2), IR("2000T01", 8), IR("2000T02", 16)), DS("2000T01/2000T02", "v2", 0, IR("2000T01:01", 4))), /* 1H segment overlaid on top of 1D segment */
TC("2000/2000T02", 4, 15, DS("2000/2000T02", "v1", 0, IR("2000", 1), IR("2000T00:01", 2), IR("2000T01", 8)), DS("2000/2000T02", "v1", 1, IR("2000T01:01", 4))), /* Segment set with two segments for the same interval */
TC("2000T01/2000T02", 1, 2, DS("2000/2000T03", "v1", 0, IR("2000", 1), IR("2000T01", 2), IR("2000T02", 4))), /* Segment wider than desired interval */
TC("2000T02/2000T04", 2, 12, DS("2000/2000T03", "v1", 0, IR("2000", 1), IR("2000T01", 2), IR("2000T02", 4)), DS("2000T03/2000T04", "v1", 0, IR("2000T03", 8))));
final List<Object[]> constructors = Lists.newArrayList();
for (final TestCase testCase : testCases) {
final TaskActionClient taskActionClient = new TaskActionClient() {
@Override
public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
if (taskAction instanceof SegmentListUsedAction) {
// Expect the interval we asked for
final SegmentListUsedAction action = (SegmentListUsedAction) taskAction;
if (action.getIntervals().equals(ImmutableList.of(testCase.interval))) {
return (RetType) ImmutableList.copyOf(testCase.segments);
} else {
throw new IllegalArgumentException("WTF");
}
} else {
throw new UnsupportedOperationException();
}
}
};
SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
EasyMock.replay(notifierFactory);
final TaskToolboxFactory taskToolboxFactory = new TaskToolboxFactory(new TaskConfig(testCase.tmpDir.getAbsolutePath(), null, null, 50000, null, false, null, null), new TaskActionClientFactory() {
@Override
public TaskActionClient create(Task task) {
return taskActionClient;
}
}, new NoopServiceEmitter(), // segment pusher
null, // segment killer
null, // segment mover
null, // segment archiver
null, // segment announcer,
null, notifierFactory, // query runner factory conglomerate corporation unionized collective
null, // query executor service
null, // monitor scheduler
null, new SegmentLoaderFactory(new SegmentLoaderLocalCacheManager(null, new SegmentLoaderConfig() {
@Override
public List<StorageLocationConfig> getLocations() {
return Lists.newArrayList();
}
}, MAPPER)), MAPPER, INDEX_MERGER, INDEX_IO, null, null, INDEX_MERGER_V9);
final Injector injector = Guice.createInjector(new Module() {
@Override
public void configure(Binder binder) {
binder.bind(TaskToolboxFactory.class).toInstance(taskToolboxFactory);
}
});
final IngestSegmentFirehoseFactory factory = new IngestSegmentFirehoseFactory(DATA_SOURCE, testCase.interval, new NoopDimFilter(), Arrays.asList(DIMENSIONS), Arrays.asList(METRICS), injector, INDEX_IO);
constructors.add(new Object[] { testCase.toString(), factory, testCase.tmpDir, testCase.expectedCount, testCase.expectedSum });
}
return constructors;
}
use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.
the class ConvertSegmentTask method run.
@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
final Iterable<DataSegment> segmentsToUpdate;
if (segment == null) {
final List<DataSegment> segments = toolbox.getTaskActionClient().submit(new SegmentListUsedAction(getDataSource(), getInterval(), null));
segmentsToUpdate = FunctionalIterable.create(segments).filter(new Predicate<DataSegment>() {
@Override
public boolean apply(DataSegment segment) {
final Integer segmentVersion = segment.getBinaryVersion();
if (!CURR_VERSION_INTEGER.equals(segmentVersion)) {
return true;
} else if (force) {
log.info("Segment[%s] already at version[%s], forcing conversion", segment.getIdentifier(), segmentVersion);
return true;
} else {
log.info("Skipping[%s], already version[%s]", segment.getIdentifier(), segmentVersion);
return false;
}
}
});
} else {
log.info("I'm in a subless mood.");
segmentsToUpdate = Collections.singleton(segment);
}
// Vestigial from a past time when this task spawned subtasks.
for (final Task subTask : generateSubTasks(getGroupId(), segmentsToUpdate, indexSpec, force, validate, getContext())) {
final TaskStatus status = subTask.run(toolbox);
if (!status.isSuccess()) {
return TaskStatus.fromCode(getId(), status.getStatusCode());
}
}
return success();
}
use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.
the class MergeTaskBase method isReady.
/**
* Checks pre-existing segments in "context" to confirm that this merge query is valid. Specifically, confirm that
* we are operating on every segment that overlaps the chosen interval.
*/
@Override
public boolean isReady(TaskActionClient taskActionClient) throws Exception {
// Try to acquire lock
if (!super.isReady(taskActionClient)) {
return false;
} else {
final Function<DataSegment, String> toIdentifier = new Function<DataSegment, String>() {
@Override
public String apply(DataSegment dataSegment) {
return dataSegment.getIdentifier();
}
};
final Set<String> current = ImmutableSet.copyOf(Iterables.transform(taskActionClient.submit(new SegmentListUsedAction(getDataSource(), getInterval(), null)), toIdentifier));
final Set<String> requested = ImmutableSet.copyOf(Iterables.transform(segments, toIdentifier));
final Set<String> missingFromRequested = Sets.difference(current, requested);
if (!missingFromRequested.isEmpty()) {
throw new ISE("Merge is invalid: current segment(s) are not in the requested set: %s", Joiner.on(", ").join(missingFromRequested));
}
final Set<String> missingFromCurrent = Sets.difference(requested, current);
if (!missingFromCurrent.isEmpty()) {
throw new ISE("Merge is invalid: requested segment(s) are not in the current set: %s", Joiner.on(", ").join(missingFromCurrent));
}
return true;
}
}
Aggregations