Search in sources :

Example 1 with SegmentListUsedAction

use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.

the class IngestSegmentFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
    log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
    if (taskToolbox == null) {
        // Noop Task is just used to create the toolbox and list segments.
        taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
    }
    try {
        final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
        final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
        VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
        for (DataSegment segment : usedSegments) {
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
        final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
        final List<String> dims;
        if (dimensions != null) {
            dims = dimensions;
        } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
            dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
        } else {
            Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
                    return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getDimensions();
                        }
                    }));
                }
            })));
            dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
        }
        final List<String> metricsList;
        if (metrics != null) {
            metricsList = metrics;
        } else {
            Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
                    return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getMetrics();
                        }
                    }));
                }
            })));
            metricsList = Lists.newArrayList(metricsSet);
        }
        final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {

            @Override
            public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
                return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {

                    @Override
                    public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
                        final DataSegment segment = input.getObject();
                        try {
                            return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                });
            }
        })));
        return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    } catch (SegmentLoadingException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Set(java.util.Set) NoopTask(io.druid.indexing.common.task.NoopTask) DataSegment(io.druid.timeline.DataSegment) Function(com.google.common.base.Function) PartitionChunk(io.druid.timeline.partition.PartitionChunk) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) File(java.io.File)

Example 2 with SegmentListUsedAction

use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.

the class SameIntervalMergeTaskTest method runTask.

private List<DataSegment> runTask(final SameIntervalMergeTask mergeTask, final String version) throws Exception {
    boolean isReady = mergeTask.isReady(new TaskActionClient() {

        @Override
        public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
            if (taskAction instanceof LockTryAcquireAction) {
                // the lock of this interval is required
                Assert.assertEquals(mergeTask.getInterval(), ((LockTryAcquireAction) taskAction).getInterval());
                isRedayCountDown.countDown();
                taskLock = new TaskLock(mergeTask.getGroupId(), mergeTask.getDataSource(), mergeTask.getInterval(), version);
                return (RetType) taskLock;
            }
            return null;
        }
    });
    // ensure LockTryAcquireAction is submitted
    Assert.assertTrue(isReady);
    final List<DataSegment> segments = Lists.newArrayList();
    mergeTask.run(new TaskToolbox(null, null, new TaskActionClient() {

        @Override
        public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
            if (taskAction instanceof LockListAction) {
                Assert.assertNotNull("taskLock should be acquired before list", taskLock);
                return (RetType) Arrays.asList(taskLock);
            }
            if (taskAction instanceof SegmentListUsedAction) {
                List<DataSegment> segments = ImmutableList.of(DataSegment.builder().dataSource(mergeTask.getDataSource()).interval(new Interval("2010-01-01/PT1H")).version("oldVersion").shardSpec(new LinearShardSpec(0)).build(), DataSegment.builder().dataSource(mergeTask.getDataSource()).interval(new Interval("2010-01-01/PT1H")).version("oldVersion").shardSpec(new LinearShardSpec(0)).build(), DataSegment.builder().dataSource(mergeTask.getDataSource()).interval(new Interval("2010-01-01/PT2H")).version("oldVersion").shardSpec(new LinearShardSpec(0)).build());
                return (RetType) segments;
            }
            if (taskAction instanceof SegmentInsertAction) {
                publishCountDown.countDown();
                return null;
            }
            return null;
        }
    }, new NoopServiceEmitter(), new DataSegmentPusher() {

        @Deprecated
        @Override
        public String getPathForHadoop(String dataSource) {
            return getPathForHadoop();
        }

        @Override
        public String getPathForHadoop() {
            return null;
        }

        @Override
        public DataSegment push(File file, DataSegment segment) throws IOException {
            // the merged segment is pushed to storage
            segments.add(segment);
            return segment;
        }
    }, null, null, null, null, null, null, null, null, new SegmentLoader() {

        @Override
        public boolean isSegmentLoaded(DataSegment segment) throws SegmentLoadingException {
            return false;
        }

        @Override
        public Segment getSegment(DataSegment segment) throws SegmentLoadingException {
            return null;
        }

        @Override
        public File getSegmentFiles(DataSegment segment) throws SegmentLoadingException {
            // dummy file to represent the downloaded segment's dir
            return new File("" + segment.getShardSpec().getPartitionNum());
        }

        @Override
        public void cleanup(DataSegment segment) throws SegmentLoadingException {
        }
    }, jsonMapper, temporaryFolder.newFolder(), EasyMock.createMock(IndexMerger.class), indexIO, null, null, EasyMock.createMock(IndexMergerV9.class)));
    return segments;
}
Also used : LockListAction(io.druid.indexing.common.actions.LockListAction) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) LockTryAcquireAction(io.druid.indexing.common.actions.LockTryAcquireAction) TaskAction(io.druid.indexing.common.actions.TaskAction) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) NoopServiceEmitter(io.druid.server.metrics.NoopServiceEmitter) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) SegmentLoader(io.druid.segment.loading.SegmentLoader) TaskToolbox(io.druid.indexing.common.TaskToolbox) TaskActionClient(io.druid.indexing.common.actions.TaskActionClient) TaskLock(io.druid.indexing.common.TaskLock) SegmentInsertAction(io.druid.indexing.common.actions.SegmentInsertAction) SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) File(java.io.File) Interval(org.joda.time.Interval)

Example 3 with SegmentListUsedAction

use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTimelineTest method constructorFeeder.

@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> constructorFeeder() {
    final List<TestCase> testCases = ImmutableList.of(TC("2000/2000T02", 3, 7, DS("2000/2000T01", "v1", 0, IR("2000", 1), IR("2000T00:01", 2)), DS("2000T01/2000T02", "v1", 0, IR("2000T01", 4))), /* Adjacent segments */
    TC("2000/2000T02", 3, 7, DS("2000/2000T02", "v1", 0, IR("2000", 1), IR("2000T00:01", 2), IR("2000T01", 8)), DS("2000T01/2000T02", "v2", 0, IR("2000T01:01", 4))), /* 1H segment overlaid on top of 2H segment */
    TC("2000/2000-01-02", 4, 23, DS("2000/2000-01-02", "v1", 0, IR("2000", 1), IR("2000T00:01", 2), IR("2000T01", 8), IR("2000T02", 16)), DS("2000T01/2000T02", "v2", 0, IR("2000T01:01", 4))), /* 1H segment overlaid on top of 1D segment */
    TC("2000/2000T02", 4, 15, DS("2000/2000T02", "v1", 0, IR("2000", 1), IR("2000T00:01", 2), IR("2000T01", 8)), DS("2000/2000T02", "v1", 1, IR("2000T01:01", 4))), /* Segment set with two segments for the same interval */
    TC("2000T01/2000T02", 1, 2, DS("2000/2000T03", "v1", 0, IR("2000", 1), IR("2000T01", 2), IR("2000T02", 4))), /* Segment wider than desired interval */
    TC("2000T02/2000T04", 2, 12, DS("2000/2000T03", "v1", 0, IR("2000", 1), IR("2000T01", 2), IR("2000T02", 4)), DS("2000T03/2000T04", "v1", 0, IR("2000T03", 8))));
    final List<Object[]> constructors = Lists.newArrayList();
    for (final TestCase testCase : testCases) {
        final TaskActionClient taskActionClient = new TaskActionClient() {

            @Override
            public <RetType> RetType submit(TaskAction<RetType> taskAction) throws IOException {
                if (taskAction instanceof SegmentListUsedAction) {
                    // Expect the interval we asked for
                    final SegmentListUsedAction action = (SegmentListUsedAction) taskAction;
                    if (action.getIntervals().equals(ImmutableList.of(testCase.interval))) {
                        return (RetType) ImmutableList.copyOf(testCase.segments);
                    } else {
                        throw new IllegalArgumentException("WTF");
                    }
                } else {
                    throw new UnsupportedOperationException();
                }
            }
        };
        SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
        EasyMock.replay(notifierFactory);
        final TaskToolboxFactory taskToolboxFactory = new TaskToolboxFactory(new TaskConfig(testCase.tmpDir.getAbsolutePath(), null, null, 50000, null, false, null, null), new TaskActionClientFactory() {

            @Override
            public TaskActionClient create(Task task) {
                return taskActionClient;
            }
        }, new NoopServiceEmitter(), // segment pusher
        null, // segment killer
        null, // segment mover
        null, // segment archiver
        null, // segment announcer,
        null, notifierFactory, // query runner factory conglomerate corporation unionized collective
        null, // query executor service
        null, // monitor scheduler
        null, new SegmentLoaderFactory(new SegmentLoaderLocalCacheManager(null, new SegmentLoaderConfig() {

            @Override
            public List<StorageLocationConfig> getLocations() {
                return Lists.newArrayList();
            }
        }, MAPPER)), MAPPER, INDEX_MERGER, INDEX_IO, null, null, INDEX_MERGER_V9);
        final Injector injector = Guice.createInjector(new Module() {

            @Override
            public void configure(Binder binder) {
                binder.bind(TaskToolboxFactory.class).toInstance(taskToolboxFactory);
            }
        });
        final IngestSegmentFirehoseFactory factory = new IngestSegmentFirehoseFactory(DATA_SOURCE, testCase.interval, new NoopDimFilter(), Arrays.asList(DIMENSIONS), Arrays.asList(METRICS), injector, INDEX_IO);
        constructors.add(new Object[] { testCase.toString(), factory, testCase.tmpDir, testCase.expectedCount, testCase.expectedSum });
    }
    return constructors;
}
Also used : Task(io.druid.indexing.common.task.Task) NoopDimFilter(io.druid.query.filter.NoopDimFilter) TaskAction(io.druid.indexing.common.actions.TaskAction) TaskActionClientFactory(io.druid.indexing.common.actions.TaskActionClientFactory) TaskConfig(io.druid.indexing.common.config.TaskConfig) Binder(com.google.inject.Binder) TaskToolboxFactory(io.druid.indexing.common.TaskToolboxFactory) Injector(com.google.inject.Injector) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) SegmentLoaderConfig(io.druid.segment.loading.SegmentLoaderConfig) SegmentLoaderFactory(io.druid.indexing.common.SegmentLoaderFactory) SegmentLoaderLocalCacheManager(io.druid.segment.loading.SegmentLoaderLocalCacheManager) NoopServiceEmitter(io.druid.server.metrics.NoopServiceEmitter) SegmentHandoffNotifierFactory(io.druid.segment.realtime.plumber.SegmentHandoffNotifierFactory) TaskActionClient(io.druid.indexing.common.actions.TaskActionClient) SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) Module(com.google.inject.Module)

Example 4 with SegmentListUsedAction

use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.

the class ConvertSegmentTask method run.

@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
    final Iterable<DataSegment> segmentsToUpdate;
    if (segment == null) {
        final List<DataSegment> segments = toolbox.getTaskActionClient().submit(new SegmentListUsedAction(getDataSource(), getInterval(), null));
        segmentsToUpdate = FunctionalIterable.create(segments).filter(new Predicate<DataSegment>() {

            @Override
            public boolean apply(DataSegment segment) {
                final Integer segmentVersion = segment.getBinaryVersion();
                if (!CURR_VERSION_INTEGER.equals(segmentVersion)) {
                    return true;
                } else if (force) {
                    log.info("Segment[%s] already at version[%s], forcing conversion", segment.getIdentifier(), segmentVersion);
                    return true;
                } else {
                    log.info("Skipping[%s], already version[%s]", segment.getIdentifier(), segmentVersion);
                    return false;
                }
            }
        });
    } else {
        log.info("I'm in a subless mood.");
        segmentsToUpdate = Collections.singleton(segment);
    }
    // Vestigial from a past time when this task spawned subtasks.
    for (final Task subTask : generateSubTasks(getGroupId(), segmentsToUpdate, indexSpec, force, validate, getContext())) {
        final TaskStatus status = subTask.run(toolbox);
        if (!status.isSuccess()) {
            return TaskStatus.fromCode(getId(), status.getStatusCode());
        }
    }
    return success();
}
Also used : SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) Predicate(com.google.common.base.Predicate)

Example 5 with SegmentListUsedAction

use of io.druid.indexing.common.actions.SegmentListUsedAction in project druid by druid-io.

the class MergeTaskBase method isReady.

/**
   * Checks pre-existing segments in "context" to confirm that this merge query is valid. Specifically, confirm that
   * we are operating on every segment that overlaps the chosen interval.
   */
@Override
public boolean isReady(TaskActionClient taskActionClient) throws Exception {
    // Try to acquire lock
    if (!super.isReady(taskActionClient)) {
        return false;
    } else {
        final Function<DataSegment, String> toIdentifier = new Function<DataSegment, String>() {

            @Override
            public String apply(DataSegment dataSegment) {
                return dataSegment.getIdentifier();
            }
        };
        final Set<String> current = ImmutableSet.copyOf(Iterables.transform(taskActionClient.submit(new SegmentListUsedAction(getDataSource(), getInterval(), null)), toIdentifier));
        final Set<String> requested = ImmutableSet.copyOf(Iterables.transform(segments, toIdentifier));
        final Set<String> missingFromRequested = Sets.difference(current, requested);
        if (!missingFromRequested.isEmpty()) {
            throw new ISE("Merge is invalid: current segment(s) are not in the requested set: %s", Joiner.on(", ").join(missingFromRequested));
        }
        final Set<String> missingFromCurrent = Sets.difference(requested, current);
        if (!missingFromCurrent.isEmpty()) {
            throw new ISE("Merge is invalid: requested segment(s) are not in the current set: %s", Joiner.on(", ").join(missingFromCurrent));
        }
        return true;
    }
}
Also used : Function(com.google.common.base.Function) ISE(io.druid.java.util.common.ISE) SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) DataSegment(io.druid.timeline.DataSegment)

Aggregations

SegmentListUsedAction (io.druid.indexing.common.actions.SegmentListUsedAction)9 DataSegment (io.druid.timeline.DataSegment)8 TaskActionClient (io.druid.indexing.common.actions.TaskActionClient)4 Function (com.google.common.base.Function)3 File (java.io.File)3 Interval (org.joda.time.Interval)3 TaskStatus (io.druid.indexing.common.TaskStatus)2 SegmentInsertAction (io.druid.indexing.common.actions.SegmentInsertAction)2 TaskAction (io.druid.indexing.common.actions.TaskAction)2 SegmentIdentifier (io.druid.segment.realtime.appenderator.SegmentIdentifier)2 NoopServiceEmitter (io.druid.server.metrics.NoopServiceEmitter)2 IOException (java.io.IOException)2 Set (java.util.Set)2 Predicate (com.google.common.base.Predicate)1 ImmutableList (com.google.common.collect.ImmutableList)1 Binder (com.google.inject.Binder)1 Injector (com.google.inject.Injector)1 Module (com.google.inject.Module)1 SegmentLoaderFactory (io.druid.indexing.common.SegmentLoaderFactory)1 TaskLock (io.druid.indexing.common.TaskLock)1