   * Add a row. Must not be called concurrently from multiple threads.
   * @param row               the row to add
   * @param sequenceName      sequenceName for this row's segment
   * @param committerSupplier supplier of a committer associated with all data that has been added, including this row
   * @return segment to which this row was added, or null if segment allocator returned null for this row
   * @throws IOException if there is an I/O error while allocating or writing to a segment
public SegmentIdentifier add(final InputRow row, final String sequenceName, final Supplier<Committer> committerSupplier) throws IOException {
    Preconditions.checkNotNull(row, "row");
    Preconditions.checkNotNull(sequenceName, "sequenceName");
    Preconditions.checkNotNull(committerSupplier, "committerSupplier");
    final SegmentIdentifier identifier = getSegment(row.getTimestamp(), sequenceName);
    if (identifier != null) {
        try {
            final int numRows = appenderator.add(identifier, row, wrapCommitterSupplier(committerSupplier));
            if (numRows >= maxRowsPerSegment) {
                moveSegmentOut(sequenceName, ImmutableList.of(identifier));
        } catch (SegmentNotWritableException e) {
            throw new ISE(e, "WTF?! Segment[%s] not writable when it should have been.", identifier);
    return identifier;
   * Push and publish all segments to the metadata store.
   * @param publisher        segment publisher
   * @param wrappedCommitter wrapped committer (from wrapCommitter)
   * @return published segments and metadata, or null if segments could not be published due to transaction failure
   * with commit metadata.
private SegmentsAndMetadata publishAll(final TransactionalSegmentPublisher publisher, final Committer wrappedCommitter) throws InterruptedException {
    final List<SegmentIdentifier> theSegments = ImmutableList.copyOf(appenderator.getSegments());
    long nTry = 0;
    while (true) {
        try {
  "Pushing segments: [%s]", Joiner.on(", ").join(theSegments));
            final SegmentsAndMetadata segmentsAndMetadata = appenderator.push(theSegments, wrappedCommitter).get();
            // Sanity check
            if (!segmentsToIdentifiers(segmentsAndMetadata.getSegments()).equals(Sets.newHashSet(theSegments))) {
                throw new ISE("WTF?! Pushed different segments than requested. Pushed[%s], requested[%s].", Joiner.on(", ").join(identifiersToStrings(segmentsToIdentifiers(segmentsAndMetadata.getSegments()))), Joiner.on(", ").join(identifiersToStrings(theSegments)));
  "Publishing segments with commitMetadata[%s]: [%s]", segmentsAndMetadata.getCommitMetadata(), Joiner.on(", ").join(segmentsAndMetadata.getSegments()));
            if (segmentsAndMetadata.getSegments().isEmpty()) {
      "Nothing to publish, skipping publish step.");
            } else {
                final boolean published = publisher.publishSegments(ImmutableSet.copyOf(segmentsAndMetadata.getSegments()), ((FiniteAppenderatorDriverMetadata) segmentsAndMetadata.getCommitMetadata()).getCallerMetadata());
                if (published) {
          "Published segments, awaiting handoff.");
                } else {
          "Transaction failure while publishing segments, checking if someone else beat us to it.");
                    if (usedSegmentChecker.findUsedSegments(segmentsToIdentifiers(segmentsAndMetadata.getSegments())).equals(Sets.newHashSet(segmentsAndMetadata.getSegments()))) {
              "Our segments really do exist, awaiting handoff.");
                    } else {
                        log.warn("Our segments don't exist, giving up.");
                        return null;
            for (final DataSegment dataSegment : segmentsAndMetadata.getSegments()) {
                handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().getPartitionNum()), MoreExecutors.sameThreadExecutor(), new Runnable() {

                    public void run() {
                        final SegmentIdentifier identifier = SegmentIdentifier.fromDataSegment(dataSegment);
              "Segment[%s] successfully handed off, dropping.", identifier);
                        final ListenableFuture<?> dropFuture = appenderator.drop(identifier);
                        Futures.addCallback(dropFuture, new FutureCallback<Object>() {

                            public void onSuccess(Object result) {
                                synchronized (handoffMonitor) {

                            public void onFailure(Throwable e) {
                                log.warn(e, "Failed to drop segment[%s]?!");
                                synchronized (handoffMonitor) {
            return segmentsAndMetadata;
        } catch (InterruptedException e) {
            throw e;
        } catch (Exception e) {
            final long sleepMillis = computeNextRetrySleep(++nTry);
            log.warn(e, "Failed publishAll (try %d), retrying in %,dms.", nTry, sleepMillis);
   * Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
   * @return persisted commit metadata
private Object bootstrapSinksFromDisk() {
    Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
    final File baseDir = tuningConfig.getBasePersistDirectory();
    if (!baseDir.exists()) {
        return null;
    final File[] files = baseDir.listFiles();
    if (files == null) {
        return null;
    final File commitFile = computeCommitFile();
    final Committed committed;
    try {
        if (commitFile.exists()) {
            committed = objectMapper.readValue(commitFile, Committed.class);
        } else {
            committed = Committed.nil();
    } catch (Exception e) {
        throw new ISE(e, "Failed to read commitFile: %s", commitFile);
    }"Loading sinks from[%s]: %s", baseDir, committed.getHydrants().keySet());
    for (File sinkDir : files) {
        final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
        if (!identifierFile.isFile()) {
            // No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
        try {
            final SegmentIdentifier identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdentifier.class);
            final int committedHydrants = committed.getCommittedHydrants(identifier.getIdentifierAsString());
            if (committedHydrants <= 0) {
      "Removing uncommitted sink at [%s]", sinkDir);
            // To avoid reading and listing of "merged" dir and other special files
            final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {

                public boolean accept(File dir, String fileName) {
                    return !(Ints.tryParse(fileName) == null);
            Arrays.sort(sinkFiles, new Comparator<File>() {

                public int compare(File o1, File o2) {
                    return, Integer.parseInt(o2.getName()));
            List<FireHydrant> hydrants = Lists.newArrayList();
            for (File hydrantDir : sinkFiles) {
                final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
                if (hydrantNumber >= committedHydrants) {
          "Removing uncommitted segment at [%s]", hydrantDir);
                } else {
          "Loading previously persisted segment at [%s]", hydrantDir);
                    if (hydrantNumber != hydrants.size()) {
                        throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
                    hydrants.add(new FireHydrant(new QueryableIndexSegment(identifier.getIdentifierAsString(), indexIO.loadIndex(hydrantDir)), hydrantNumber));
            // Make sure we loaded enough hydrants.
            if (committedHydrants != hydrants.size()) {
                throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
            Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions(), hydrants);
            sinks.put(identifier, currSink);
            sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
        } catch (IOException e) {
            log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
    // Make sure we loaded all committed sinks.
    final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), new Function<SegmentIdentifier, String>() {

        public String apply(SegmentIdentifier input) {
            return input.getIdentifierAsString();
    final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
    if (!missingSinks.isEmpty()) {
        throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
    return committed.getMetadata();
   * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
   * be run in the single-threaded pushExecutor.
   * @param identifier sink identifier
   * @param sink       sink to push
   * @return segment descriptor, or null if the sink is no longer valid
private DataSegment mergeAndPush(final SegmentIdentifier identifier, final Sink sink) {
    // Bail out if this sink is null or otherwise not what we expect.
    if (sinks.get(identifier) != sink) {
        log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
        return null;
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    for (FireHydrant hydrant : sink) {
        if (sink.isWritable()) {
            throw new ISE("WTF?! Expected sink to be no longer writable before mergeAndPush. Segment[%s].", identifier);
        synchronized (hydrant) {
            if (!hydrant.hasSwapped()) {
                throw new ISE("WTF?! Expected sink to be fully persisted before mergeAndPush. Segment[%s].", identifier);
    try {
        if (descriptorFile.exists()) {
            // Already pushed.
  "Segment[%s] already pushed.", identifier);
            return objectMapper.readValue(descriptorFile, DataSegment.class);
        }"Pushing merged index for segment[%s].", identifier);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        List<QueryableIndex> indexes = Lists.newArrayList();
        for (FireHydrant fireHydrant : sink) {
            Segment segment = fireHydrant.getSegment();
            final QueryableIndex queryableIndex = segment.asQueryableIndex();
  "Adding hydrant[%s]", fireHydrant);
        final File mergedFile;
        mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, tuningConfig.getIndexSpec());
        QueryableIndex index = indexIO.loadIndex(mergedFile);
        DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
        objectMapper.writeValue(descriptorFile, segment);"Pushed merged index for segment[%s], descriptor is: %s", identifier, segment);
        return segment;
    } catch (Exception e) {
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw Throwables.propagate(e);
public static void addNextRow(final Supplier<Committer> committerSupplier, final Firehose firehose, final Plumber plumber, final boolean reportParseExceptions, final FireDepartmentMetrics metrics) {
    final InputRow inputRow;
    try {
        inputRow = firehose.nextRow();
    } catch (ParseException e) {
        if (reportParseExceptions) {
            throw e;
        } else {
            log.debug(e, "Discarded row due to exception, considering unparseable.");
    if (inputRow == null) {
        if (reportParseExceptions) {
            throw new ParseException("null input row");
        } else {
            log.debug("Discarded null input row, considering unparseable.");
    final int numRows;
    try {
        numRows = plumber.add(inputRow, committerSupplier);
    } catch (IndexSizeExceededException e) {
        // plumber.add should be swapping out indexes before they fill up.
        throw new ISE(e, "WTF?! Index size exceeded, this shouldn't happen. Bad Plumber!");
    if (numRows == -1) {
        log.debug("Discarded row[%s], considering thrownAway.", inputRow);
Also used : InputRow( ISE( ParseException( IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException)


