Search in sources :

Example 11 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class AppenderatorPlumber method mergeAndPush.

private void mergeAndPush() {
    final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
    final Period windowPeriod = config.getWindowPeriod();
    final long windowMillis = windowPeriod.toStandardDuration().getMillis();
    log.info("Starting merge and push.");
    DateTime minTimestampAsDate = segmentGranularity.bucketStart(new DateTime(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis));
    long minTimestamp = minTimestampAsDate.getMillis();
    final List<SegmentIdentifier> appenderatorSegments = appenderator.getSegments();
    final List<SegmentIdentifier> segmentsToPush = Lists.newArrayList();
    if (shuttingDown) {
        log.info("Found [%,d] segments. Attempting to hand off all of them.", appenderatorSegments.size());
        segmentsToPush.addAll(appenderatorSegments);
    } else {
        log.info("Found [%,d] segments. Attempting to hand off segments that start before [%s].", appenderatorSegments.size(), minTimestampAsDate);
        for (SegmentIdentifier segment : appenderatorSegments) {
            final Long intervalStart = segment.getInterval().getStartMillis();
            if (intervalStart < minTimestamp) {
                log.info("Adding entry [%s] for merge and push.", segment);
                segmentsToPush.add(segment);
            } else {
                log.info("Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.", segment, new DateTime(intervalStart), minTimestampAsDate);
            }
        }
    }
    log.info("Found [%,d] segments to persist and merge", segmentsToPush.size());
    final Function<Throwable, Void> errorHandler = new Function<Throwable, Void>() {

        @Override
        public Void apply(Throwable throwable) {
            final List<String> segmentIdentifierStrings = Lists.transform(segmentsToPush, new Function<SegmentIdentifier, String>() {

                @Override
                public String apply(SegmentIdentifier input) {
                    return input.getIdentifierAsString();
                }
            });
            log.makeAlert(throwable, "Failed to publish merged indexes[%s]", schema.getDataSource()).addData("segments", segmentIdentifierStrings).emit();
            if (shuttingDown) {
                // We're trying to shut down, and these segments failed to push. Let's just get rid of them.
                // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                cleanShutdown = false;
                for (SegmentIdentifier identifier : segmentsToPush) {
                    dropSegment(identifier);
                }
            }
            return null;
        }
    };
    // WARNING: Committers.nil() here means that on-disk data can get out of sync with committing.
    Futures.addCallback(appenderator.push(segmentsToPush, Committers.nil()), new FutureCallback<SegmentsAndMetadata>() {

        @Override
        public void onSuccess(SegmentsAndMetadata result) {
            // Immediately publish after pushing
            for (DataSegment pushedSegment : result.getSegments()) {
                try {
                    segmentPublisher.publishSegment(pushedSegment);
                } catch (Exception e) {
                    errorHandler.apply(e);
                }
            }
            log.info("Published [%,d] sinks.", segmentsToPush.size());
        }

        @Override
        public void onFailure(Throwable e) {
            log.warn(e, "Failed to push [%,d] segments.", segmentsToPush.size());
            errorHandler.apply(e);
        }
    });
}
Also used : Period(org.joda.time.Period) Granularity(io.druid.java.util.common.granularity.Granularity) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) Function(com.google.common.base.Function)

Example 12 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class RealtimeManager method getQueryRunnerForSegments.

@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
    final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
    final Map<Integer, FireChief> partitionChiefs = chiefs.get(Iterables.getOnlyElement(query.getDataSource().getNames()));
    return partitionChiefs == null ? new NoopQueryRunner<T>() : factory.getToolchest().mergeResults(factory.mergeRunners(MoreExecutors.sameThreadExecutor(), Iterables.transform(specs, new Function<SegmentDescriptor, QueryRunner<T>>() {

        @Override
        public QueryRunner<T> apply(SegmentDescriptor spec) {
            final FireChief retVal = partitionChiefs.get(spec.getPartitionNumber());
            return retVal == null ? new NoopQueryRunner<T>() : retVal.getQueryRunner(query.withQuerySegmentSpec(new SpecificSegmentSpec(spec)));
        }
    })));
}
Also used : Function(com.google.common.base.Function) Query(io.druid.query.Query) SpecificSegmentSpec(io.druid.query.spec.SpecificSegmentSpec) SegmentDescriptor(io.druid.query.SegmentDescriptor)

Example 13 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class AppenderatorImpl method bootstrapSinksFromDisk.

/**
   * Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
   *
   * @return persisted commit metadata
   */
private Object bootstrapSinksFromDisk() {
    Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
    final File baseDir = tuningConfig.getBasePersistDirectory();
    if (!baseDir.exists()) {
        return null;
    }
    final File[] files = baseDir.listFiles();
    if (files == null) {
        return null;
    }
    final File commitFile = computeCommitFile();
    final Committed committed;
    try {
        if (commitFile.exists()) {
            committed = objectMapper.readValue(commitFile, Committed.class);
        } else {
            committed = Committed.nil();
        }
    } catch (Exception e) {
        throw new ISE(e, "Failed to read commitFile: %s", commitFile);
    }
    log.info("Loading sinks from[%s]: %s", baseDir, committed.getHydrants().keySet());
    for (File sinkDir : files) {
        final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
        if (!identifierFile.isFile()) {
            // No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
            continue;
        }
        try {
            final SegmentIdentifier identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdentifier.class);
            final int committedHydrants = committed.getCommittedHydrants(identifier.getIdentifierAsString());
            if (committedHydrants <= 0) {
                log.info("Removing uncommitted sink at [%s]", sinkDir);
                FileUtils.deleteDirectory(sinkDir);
                continue;
            }
            // To avoid reading and listing of "merged" dir and other special files
            final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {

                @Override
                public boolean accept(File dir, String fileName) {
                    return !(Ints.tryParse(fileName) == null);
                }
            });
            Arrays.sort(sinkFiles, new Comparator<File>() {

                @Override
                public int compare(File o1, File o2) {
                    return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
                }
            });
            List<FireHydrant> hydrants = Lists.newArrayList();
            for (File hydrantDir : sinkFiles) {
                final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
                if (hydrantNumber >= committedHydrants) {
                    log.info("Removing uncommitted segment at [%s]", hydrantDir);
                    FileUtils.deleteDirectory(hydrantDir);
                } else {
                    log.info("Loading previously persisted segment at [%s]", hydrantDir);
                    if (hydrantNumber != hydrants.size()) {
                        throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
                    }
                    hydrants.add(new FireHydrant(new QueryableIndexSegment(identifier.getIdentifierAsString(), indexIO.loadIndex(hydrantDir)), hydrantNumber));
                }
            }
            // Make sure we loaded enough hydrants.
            if (committedHydrants != hydrants.size()) {
                throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
            }
            Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions(), hydrants);
            sinks.put(identifier, currSink);
            sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
            segmentAnnouncer.announceSegment(currSink.getSegment());
        } catch (IOException e) {
            log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
        }
    }
    // Make sure we loaded all committed sinks.
    final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), new Function<SegmentIdentifier, String>() {

        @Override
        public String apply(SegmentIdentifier input) {
            return input.getIdentifierAsString();
        }
    }));
    final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
    if (!missingSinks.isEmpty()) {
        throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
    }
    return committed.getMetadata();
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) IOException(java.io.IOException) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) FilenameFilter(java.io.FilenameFilter) Function(com.google.common.base.Function) Sink(io.druid.segment.realtime.plumber.Sink) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File)

Example 14 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class AppenderatorImpl method persistAll.

@Override
public ListenableFuture<Object> persistAll(final Committer committer) {
    // Submit persistAll task to the persistExecutor
    final Map<SegmentIdentifier, Integer> commitHydrants = Maps.newHashMap();
    final List<Pair<FireHydrant, SegmentIdentifier>> indexesToPersist = Lists.newArrayList();
    final Set<SegmentIdentifier> identifiers = sinks.keySet();
    for (SegmentIdentifier identifier : identifiers) {
        final Sink sink = sinks.get(identifier);
        final List<FireHydrant> hydrants = Lists.newArrayList(sink);
        commitHydrants.put(identifier, hydrants.size());
        final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
        for (FireHydrant hydrant : hydrants.subList(0, limit)) {
            if (!hydrant.hasSwapped()) {
                log.info("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
                indexesToPersist.add(Pair.of(hydrant, identifier));
            }
        }
        if (sink.swappable()) {
            indexesToPersist.add(Pair.of(sink.swap(), identifier));
        }
    }
    log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
    final String threadName = String.format("%s-incremental-persist", schema.getDataSource());
    final Object commitMetadata = committer.getMetadata();
    final Stopwatch runExecStopwatch = Stopwatch.createStarted();
    final Stopwatch persistStopwatch = Stopwatch.createStarted();
    final ListenableFuture<Object> future = persistExecutor.submit(new ThreadRenamingCallable<Object>(threadName) {

        @Override
        public Object doCall() {
            try {
                for (Pair<FireHydrant, SegmentIdentifier> pair : indexesToPersist) {
                    metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
                }
                log.info("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(Iterables.transform(commitHydrants.entrySet(), new Function<Map.Entry<SegmentIdentifier, Integer>, String>() {

                    @Override
                    public String apply(Map.Entry<SegmentIdentifier, Integer> entry) {
                        return String.format("%s:%d", entry.getKey().getIdentifierAsString(), entry.getValue());
                    }
                })));
                committer.run();
                objectMapper.writeValue(computeCommitFile(), Committed.create(commitHydrants, commitMetadata));
                return commitMetadata;
            } catch (Exception e) {
                metrics.incrementFailedPersists();
                throw Throwables.propagate(e);
            } finally {
                metrics.incrementNumPersists();
                metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
                persistStopwatch.stop();
            }
        }
    });
    final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
    metrics.incrementPersistBackPressureMillis(startDelay);
    if (startDelay > WARN_DELAY) {
        log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
    }
    runExecStopwatch.stop();
    resetNextFlush();
    // NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
    rowsCurrentlyInMemory.set(0);
    return future;
}
Also used : Stopwatch(com.google.common.base.Stopwatch) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Function(com.google.common.base.Function) Sink(io.druid.segment.realtime.plumber.Sink) FireHydrant(io.druid.segment.realtime.FireHydrant) Map(java.util.Map) Pair(io.druid.java.util.common.Pair)

Example 15 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class DatasourcesResource method getSegmentDataSourceSpecificInterval.

/**
   * Provides serverView for a datasource and Interval which gives details about servers hosting segments for an interval
   * Used by the realtime tasks to fetch a view of the interval they are interested in.
   */
@GET
@Path("/{dataSourceName}/intervals/{interval}/serverview")
@Produces(MediaType.APPLICATION_JSON)
@ResourceFilters(DatasourceResourceFilter.class)
public Response getSegmentDataSourceSpecificInterval(@PathParam("dataSourceName") String dataSourceName, @PathParam("interval") String interval, @QueryParam("partial") final boolean partial) {
    TimelineLookup<String, SegmentLoadInfo> timeline = serverInventoryView.getTimeline(new TableDataSource(dataSourceName));
    final Interval theInterval = new Interval(interval.replace("_", "/"));
    if (timeline == null) {
        log.debug("No timeline found for datasource[%s]", dataSourceName);
        return Response.ok(Lists.<ImmutableSegmentLoadInfo>newArrayList()).build();
    }
    Iterable<TimelineObjectHolder<String, SegmentLoadInfo>> lookup = timeline.lookupWithIncompletePartitions(theInterval);
    FunctionalIterable<ImmutableSegmentLoadInfo> retval = FunctionalIterable.create(lookup).transformCat(new Function<TimelineObjectHolder<String, SegmentLoadInfo>, Iterable<ImmutableSegmentLoadInfo>>() {

        @Override
        public Iterable<ImmutableSegmentLoadInfo> apply(TimelineObjectHolder<String, SegmentLoadInfo> input) {
            return Iterables.transform(input.getObject(), new Function<PartitionChunk<SegmentLoadInfo>, ImmutableSegmentLoadInfo>() {

                @Override
                public ImmutableSegmentLoadInfo apply(PartitionChunk<SegmentLoadInfo> chunk) {
                    return chunk.getObject().toImmutableSegmentLoadInfo();
                }
            });
        }
    });
    return Response.ok(retval).build();
}
Also used : FunctionalIterable(io.druid.java.util.common.guava.FunctionalIterable) ImmutableSegmentLoadInfo(io.druid.client.ImmutableSegmentLoadInfo) SegmentLoadInfo(io.druid.client.SegmentLoadInfo) Function(com.google.common.base.Function) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) TableDataSource(io.druid.query.TableDataSource) ImmutableSegmentLoadInfo(io.druid.client.ImmutableSegmentLoadInfo) PartitionChunk(io.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Path(javax.ws.rs.Path) ResourceFilters(com.sun.jersey.spi.container.ResourceFilters) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Aggregations

Function (com.google.common.base.Function)315 Nullable (javax.annotation.Nullable)57 ArrayList (java.util.ArrayList)51 List (java.util.List)49 Test (org.junit.Test)49 IOException (java.io.IOException)48 Map (java.util.Map)45 File (java.io.File)29 HashMap (java.util.HashMap)29 ImmutableList (com.google.common.collect.ImmutableList)23 ImmutableMap (com.google.common.collect.ImmutableMap)19 DateTime (org.joda.time.DateTime)19 Optional (com.google.common.base.Optional)18 ISE (io.druid.java.util.common.ISE)14 Iterator (java.util.Iterator)14 ImmutableSet (com.google.common.collect.ImmutableSet)13 Result (io.druid.query.Result)13 Feature (org.opengis.feature.Feature)13 BuildTarget (com.facebook.buck.model.BuildTarget)12 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)12