Search in sources :

Example 1 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class QuotableWhiteSpaceSplitter method restore.

@Override
public List<Pair<Task, ListenableFuture<TaskStatus>>> restore() {
    final File restoreFile = getRestoreFile();
    final TaskRestoreInfo taskRestoreInfo;
    if (restoreFile.exists()) {
        try {
            taskRestoreInfo = jsonMapper.readValue(restoreFile, TaskRestoreInfo.class);
        } catch (Exception e) {
            log.error(e, "Failed to read restorable tasks from file[%s]. Skipping restore.", restoreFile);
            return ImmutableList.of();
        }
    } else {
        return ImmutableList.of();
    }
    final List<Pair<Task, ListenableFuture<TaskStatus>>> retVal = Lists.newArrayList();
    for (final String taskId : taskRestoreInfo.getRunningTasks()) {
        try {
            final File taskFile = new File(taskConfig.getTaskDir(taskId), "task.json");
            final Task task = jsonMapper.readValue(taskFile, Task.class);
            if (!task.getId().equals(taskId)) {
                throw new ISE("WTF?! Task[%s] restore file had wrong id[%s].", taskId, task.getId());
            }
            if (taskConfig.isRestoreTasksOnRestart() && task.canRestore()) {
                log.info("Restoring task[%s].", task.getId());
                retVal.add(Pair.of(task, run(task)));
            }
        } catch (Exception e) {
            log.warn(e, "Failed to restore task[%s]. Trying to restore other tasks.", taskId);
        }
    }
    log.info("Restored %,d tasks.", retVal.size());
    return retVal;
}
Also used : Task(io.druid.indexing.common.task.Task) ISE(io.druid.java.util.common.ISE) TaskStatus(io.druid.indexing.common.TaskStatus) File(java.io.File) IOException(java.io.IOException) Pair(io.druid.java.util.common.Pair)

Example 2 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class RealtimeIndexTaskTest method testRestore.

@Test(timeout = 60_000L)
public void testRestore() throws Exception {
    final File directory = tempFolder.newFolder();
    final RealtimeIndexTask task1 = makeRealtimeTask(null);
    final DataSegment publishedSegment;
    // First run:
    {
        final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
        final TaskToolbox taskToolbox = makeToolbox(task1, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task1, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task1.getFirehose() == null) {
            Thread.sleep(50);
        }
        final TestFirehose firehose = (TestFirehose) task1.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim1"), ImmutableMap.<String, Object>of("dim1", "foo"))));
        // Trigger graceful shutdown.
        task1.stopGracefully();
        // Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
        // Nothing should be published.
        Assert.assertEquals(Sets.newHashSet(), mdc.getPublished());
    }
    // Second run:
    {
        final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
        final RealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
        final TaskToolbox taskToolbox = makeToolbox(task2, mdc, directory);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task2, taskToolbox);
        // Wait for firehose to show up, it starts off null.
        while (task2.getFirehose() == null) {
            Thread.sleep(50);
        }
        // Do a query, at this point the previous data should be loaded.
        Assert.assertEquals(1, sumMetric(task2, "rows"));
        final TestFirehose firehose = (TestFirehose) task2.getFirehose();
        firehose.addRows(ImmutableList.<InputRow>of(new MapBasedInputRow(now, ImmutableList.of("dim2"), ImmutableMap.<String, Object>of("dim2", "bar"))));
        // Stop the firehose, this will drain out existing events.
        firehose.close();
        // Wait for publish.
        while (mdc.getPublished().isEmpty()) {
            Thread.sleep(50);
        }
        publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
        // Do a query.
        Assert.assertEquals(2, sumMetric(task2, "rows"));
        // Simulate handoff.
        for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
            final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
            Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
            executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
        }
        handOffCallbacks.clear();
        // Wait for the task to finish.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskStatus.Status.SUCCESS, taskStatus.getStatusCode());
    }
}
Also used : TaskStatus(io.druid.indexing.common.TaskStatus) DataSegment(io.druid.timeline.DataSegment) TaskToolbox(io.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TestIndexerMetadataStorageCoordinator(io.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SegmentDescriptor(io.druid.query.SegmentDescriptor) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File) Pair(io.druid.java.util.common.Pair) Test(org.junit.Test)

Example 3 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class OverlordResource method securedTaskRunnerWorkItem.

private Collection<? extends TaskRunnerWorkItem> securedTaskRunnerWorkItem(Collection<? extends TaskRunnerWorkItem> collectionToFilter, HttpServletRequest req) {
    final Map<Pair<Resource, Action>, Access> resourceAccessMap = new HashMap<>();
    final AuthorizationInfo authorizationInfo = (AuthorizationInfo) req.getAttribute(AuthConfig.DRUID_AUTH_TOKEN);
    return Collections2.filter(collectionToFilter, new Predicate<TaskRunnerWorkItem>() {

        @Override
        public boolean apply(TaskRunnerWorkItem input) {
            final String taskId = input.getTaskId();
            final Optional<Task> optionalTask = taskStorageQueryAdapter.getTask(taskId);
            if (!optionalTask.isPresent()) {
                throw new WebApplicationException(Response.serverError().entity(String.format("No task information found for task with id: [%s]", taskId)).build());
            }
            Resource resource = new Resource(optionalTask.get().getDataSource(), ResourceType.DATASOURCE);
            Action action = Action.READ;
            Pair<Resource, Action> key = new Pair<>(resource, action);
            if (resourceAccessMap.containsKey(key)) {
                return resourceAccessMap.get(key).isAllowed();
            } else {
                Access access = authorizationInfo.isAuthorized(key.lhs, key.rhs);
                resourceAccessMap.put(key, access);
                return access.isAllowed();
            }
        }
    });
}
Also used : Action(io.druid.server.security.Action) TaskRunnerWorkItem(io.druid.indexing.overlord.TaskRunnerWorkItem) Optional(com.google.common.base.Optional) WebApplicationException(javax.ws.rs.WebApplicationException) HashMap(java.util.HashMap) Access(io.druid.server.security.Access) Resource(io.druid.server.security.Resource) AuthorizationInfo(io.druid.server.security.AuthorizationInfo) Pair(io.druid.java.util.common.Pair)

Example 4 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class TaskLockbox method syncFromStorage.

/**
   * Wipe out our current in-memory state and resync it from our bundled {@link io.druid.indexing.overlord.TaskStorage}.
   */
public void syncFromStorage() {
    giant.lock();
    try {
        // Load stuff from taskStorage first. If this fails, we don't want to lose all our locks.
        final Set<String> storedActiveTasks = Sets.newHashSet();
        final List<Pair<Task, TaskLock>> storedLocks = Lists.newArrayList();
        for (final Task task : taskStorage.getActiveTasks()) {
            storedActiveTasks.add(task.getId());
            for (final TaskLock taskLock : taskStorage.getLocks(task.getId())) {
                storedLocks.add(Pair.of(task, taskLock));
            }
        }
        // Sort locks by version, so we add them back in the order they were acquired.
        final Ordering<Pair<Task, TaskLock>> byVersionOrdering = new Ordering<Pair<Task, TaskLock>>() {

            @Override
            public int compare(Pair<Task, TaskLock> left, Pair<Task, TaskLock> right) {
                // The second compare shouldn't be necessary, but, whatever.
                return ComparisonChain.start().compare(left.rhs.getVersion(), right.rhs.getVersion()).compare(left.lhs.getId(), right.lhs.getId()).result();
            }
        };
        running.clear();
        activeTasks.clear();
        activeTasks.addAll(storedActiveTasks);
        // Bookkeeping for a log message at the end
        int taskLockCount = 0;
        for (final Pair<Task, TaskLock> taskAndLock : byVersionOrdering.sortedCopy(storedLocks)) {
            final Task task = taskAndLock.lhs;
            final TaskLock savedTaskLock = taskAndLock.rhs;
            if (savedTaskLock.getInterval().toDurationMillis() <= 0) {
                // "Impossible", but you never know what crazy stuff can be restored from storage.
                log.warn("WTF?! Got lock with empty interval for task: %s", task.getId());
                continue;
            }
            final Optional<TaskLock> acquiredTaskLock = tryLock(task, savedTaskLock.getInterval(), Optional.of(savedTaskLock.getVersion()));
            if (acquiredTaskLock.isPresent() && savedTaskLock.getVersion().equals(acquiredTaskLock.get().getVersion())) {
                taskLockCount++;
                log.info("Reacquired lock on interval[%s] version[%s] for task: %s", savedTaskLock.getInterval(), savedTaskLock.getVersion(), task.getId());
            } else if (acquiredTaskLock.isPresent()) {
                taskLockCount++;
                log.info("Could not reacquire lock on interval[%s] version[%s] (got version[%s] instead) for task: %s", savedTaskLock.getInterval(), savedTaskLock.getVersion(), acquiredTaskLock.get().getVersion(), task.getId());
            } else {
                log.info("Could not reacquire lock on interval[%s] version[%s] for task: %s", savedTaskLock.getInterval(), savedTaskLock.getVersion(), task.getId());
            }
        }
        log.info("Synced %,d locks for %,d activeTasks from storage (%,d locks ignored).", taskLockCount, activeTasks.size(), storedLocks.size() - taskLockCount);
    } finally {
        giant.unlock();
    }
}
Also used : Task(io.druid.indexing.common.task.Task) TaskLock(io.druid.indexing.common.TaskLock) Ordering(com.google.common.collect.Ordering) Pair(io.druid.java.util.common.Pair)

Example 5 with Pair

use of io.druid.java.util.common.Pair in project druid by druid-io.

the class GroupByRowProcessor method process.

public static Sequence<Row> process(final Query queryParam, final Sequence<Row> rows, final Map<String, ValueType> rowSignature, final GroupByQueryConfig config, final GroupByQueryResource resource, final ObjectMapper spillMapper, final String processingTmpDir) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final AggregatorFactory[] aggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
    for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
        aggregatorFactories[i] = query.getAggregatorSpecs().get(i);
    }
    final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
    final List<Interval> queryIntervals = query.getIntervals();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimFilter()));
    final SettableSupplier<Row> rowSupplier = new SettableSupplier<>();
    final RowBasedColumnSelectorFactory columnSelectorFactory = RowBasedColumnSelectorFactory.create(rowSupplier, rowSignature);
    final ValueMatcher filterMatcher = filter == null ? BooleanValueMatcher.of(true) : filter.makeMatcher(columnSelectorFactory);
    final FilteredSequence<Row> filteredSequence = new FilteredSequence<>(rows, new Predicate<Row>() {

        @Override
        public boolean apply(Row input) {
            boolean inInterval = false;
            DateTime rowTime = input.getTimestamp();
            for (Interval queryInterval : queryIntervals) {
                if (queryInterval.contains(rowTime)) {
                    inInterval = true;
                    break;
                }
            }
            if (!inInterval) {
                return false;
            }
            rowSupplier.set(input);
            return filterMatcher.matches();
        }
    });
    return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {

        @Override
        public CloseableGrouperIterator<RowBasedKey, Row> make() {
            // This contains all closeable objects which are closed when the returned iterator iterates all the elements,
            // or an exceptions is thrown. The objects are closed in their reverse order.
            final List<Closeable> closeOnExit = Lists.newArrayList();
            try {
                final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
                closeOnExit.add(temporaryStorage);
                Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, true, rowSignature, querySpecificConfig, new Supplier<ByteBuffer>() {

                    @Override
                    public ByteBuffer get() {
                        final ResourceHolder<ByteBuffer> mergeBufferHolder = resource.getMergeBuffer();
                        closeOnExit.add(mergeBufferHolder);
                        return mergeBufferHolder.get();
                    }
                }, -1, temporaryStorage, spillMapper, aggregatorFactories);
                final Grouper<RowBasedKey> grouper = pair.lhs;
                final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
                closeOnExit.add(grouper);
                final Grouper<RowBasedKey> retVal = filteredSequence.accumulate(grouper, accumulator);
                if (retVal != grouper) {
                    throw GroupByQueryHelper.throwAccumulationResourceLimitExceededException();
                }
                return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {

                    @Override
                    public void close() throws IOException {
                        for (Closeable closeable : Lists.reverse(closeOnExit)) {
                            CloseQuietly.close(closeable);
                        }
                    }
                });
            } catch (Throwable e) {
                // Exception caught while setting up the iterator; release resources.
                for (Closeable closeable : Lists.reverse(closeOnExit)) {
                    CloseQuietly.close(closeable);
                }
                throw e;
            }
        }

        @Override
        public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
            iterFromMake.close();
        }
    });
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) Closeable(java.io.Closeable) RowBasedColumnSelectorFactory(io.druid.query.groupby.RowBasedColumnSelectorFactory) DateTime(org.joda.time.DateTime) GroupByQuery(io.druid.query.groupby.GroupByQuery) List(java.util.List) Supplier(com.google.common.base.Supplier) SettableSupplier(io.druid.common.guava.SettableSupplier) Pair(io.druid.java.util.common.Pair) BooleanValueMatcher(io.druid.segment.filter.BooleanValueMatcher) ValueMatcher(io.druid.query.filter.ValueMatcher) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) FilteredSequence(io.druid.java.util.common.guava.FilteredSequence) RowBasedKey(io.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) ByteBuffer(java.nio.ByteBuffer) BaseSequence(io.druid.java.util.common.guava.BaseSequence) SettableSupplier(io.druid.common.guava.SettableSupplier) Filter(io.druid.query.filter.Filter) Row(io.druid.data.input.Row) File(java.io.File) Interval(org.joda.time.Interval)

Aggregations

Pair (io.druid.java.util.common.Pair)62 Test (org.junit.Test)26 Interval (org.joda.time.Interval)15 DataSegment (io.druid.timeline.DataSegment)11 Map (java.util.Map)11 ByteBuffer (java.nio.ByteBuffer)10 HashMap (java.util.HashMap)9 SerializablePair (io.druid.collections.SerializablePair)8 SegmentDescriptor (io.druid.query.SegmentDescriptor)8 List (java.util.List)8 ImmutableMap (com.google.common.collect.ImmutableMap)7 Executor (java.util.concurrent.Executor)7 DateTime (org.joda.time.DateTime)7 Function (com.google.common.base.Function)6 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)6 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)6 TaskStatus (io.druid.indexing.common.TaskStatus)6 ISE (io.druid.java.util.common.ISE)6 Access (io.druid.server.security.Access)6 Action (io.druid.server.security.Action)6