Examples with Committer - io.druid.data.input.Committer

Example 6 with Committer

use of io.druid.data.input.Committer in project druid by druid-io.

the class RealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    runThread = Thread.currentThread();
    if (this.plumber != null) {
        throw new IllegalStateException("WTF?!? run with non-null plumber??!");
    }
    boolean normalExit = true;
    // It would be nice to get the PlumberSchool in the constructor.  Although that will need jackson injectables for
    // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been introduced.
    final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);
    // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
    // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
    // which will typically be either the main data processing loop or the persist thread.
    // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
    final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {

        @Override
        public void announceSegment(final DataSegment segment) throws IOException {
            // Side effect: Calling announceSegment causes a lock to be acquired
            toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            toolbox.getSegmentAnnouncer().announceSegment(segment);
        }

        @Override
        public void unannounceSegment(final DataSegment segment) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegment(segment);
            } finally {
                toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
            }
        }

        @Override
        public void announceSegments(Iterable<DataSegment> segments) throws IOException {
            // Side effect: Calling announceSegments causes locks to be acquired
            for (DataSegment segment : segments) {
                toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            }
            toolbox.getSegmentAnnouncer().announceSegments(segments);
        }

        @Override
        public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegments(segments);
            } finally {
                for (DataSegment segment : segments) {
                    toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
                }
            }
        }

        @Override
        public boolean isAnnounced(DataSegment segment) {
            return toolbox.getSegmentAnnouncer().isAnnounced(segment);
        }
    };
    // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
    // NOTE: (and thus the firehose)
    // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
    // realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
    // the plumber such that waiting for the coordinator doesn't block data processing.
    final VersioningPolicy versioningPolicy = new VersioningPolicy() {

        @Override
        public String getVersion(final Interval interval) {
            try {
                // Side effect: Calling getVersion causes a lock to be acquired
                final TaskLock myLock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
                return myLock.getVersion();
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    };
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
    RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(new File(toolbox.getTaskWorkDir(), "persist")).withVersioningPolicy(versioningPolicy);
    final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
    this.metrics = fireDepartment.getMetrics();
    final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() }));
    this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
    // NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
    // NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
    // NOTE: (partitionNum_index.zip for HDFS data storage) and descriptor.json (partitionNum_descriptor.json for
    // NOTE: HDFS data storage) to mismatch, or it can cause historical nodes to load different instances of
    // NOTE: the "same" segment.
    final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryExecutorService(), toolbox.getIndexMerger(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getObjectMapper());
    this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
    Supplier<Committer> committerSupplier = null;
    try {
        plumber.startJob();
        // Set up metrics emission
        toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(spec.getDataSchema().getParser());
                committerSupplier = Committers.supplierFromFirehose(firehose);
            }
        }
        // Time to read data!
        while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
            Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
        }
    } catch (Throwable e) {
        normalExit = false;
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        throw e;
    } finally {
        if (normalExit) {
            try {
                // Persist if we had actually started.
                if (firehose != null) {
                    log.info("Persisting remaining data.");
                    final Committer committer = committerSupplier.get();
                    final CountDownLatch persistLatch = new CountDownLatch(1);
                    plumber.persist(new Committer() {

                        @Override
                        public Object getMetadata() {
                            return committer.getMetadata();
                        }

                        @Override
                        public void run() {
                            try {
                                committer.run();
                            } finally {
                                persistLatch.countDown();
                            }
                        }
                    });
                    persistLatch.await();
                }
                if (gracefullyStopped) {
                    log.info("Gracefully stopping.");
                } else {
                    log.info("Finishing the job.");
                    synchronized (this) {
                        if (gracefullyStopped) {
                            // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                            log.info("Gracefully stopping.");
                        } else {
                            finishingJob = true;
                        }
                    }
                    if (finishingJob) {
                        plumber.finishJob();
                    }
                }
            } catch (InterruptedException e) {
                log.debug(e, "Interrupted while finishing the job");
            } catch (Exception e) {
                log.makeAlert(e, "Failed to finish realtime task").emit();
                throw e;
            } finally {
                if (firehose != null) {
                    CloseQuietly.close(firehose);
                }
                toolbox.getMonitorScheduler().removeMonitor(metricsMonitor);
            }
        }
    }
    log.info("Job done!");
    return TaskStatus.success(getId());
}

Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) DataSegmentAnnouncer(io.druid.server.coordination.DataSegmentAnnouncer) EventReceiverFirehoseFactory(io.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(io.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(io.druid.data.input.FirehoseFactory) DataSegment(io.druid.timeline.DataSegment) FireDepartment(io.druid.segment.realtime.FireDepartment) SegmentPublisher(io.druid.segment.realtime.SegmentPublisher) TaskLock(io.druid.indexing.common.TaskLock) LockAcquireAction(io.druid.indexing.common.actions.LockAcquireAction) IOException(java.io.IOException) RealtimePlumberSchool(io.druid.segment.realtime.plumber.RealtimePlumberSchool) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) LockReleaseAction(io.druid.indexing.common.actions.LockReleaseAction) IOException(java.io.IOException) DataSchema(io.druid.segment.indexing.DataSchema) VersioningPolicy(io.druid.segment.realtime.plumber.VersioningPolicy) RealtimePlumberSchool(io.druid.segment.realtime.plumber.RealtimePlumberSchool) RealtimeMetricsMonitor(io.druid.segment.realtime.RealtimeMetricsMonitor) Committer(io.druid.data.input.Committer) File(java.io.File) Interval(org.joda.time.Interval)

Example 7 with Committer

use of io.druid.data.input.Committer in project druid by druid-io.

the class YeOldePlumberSchool method findPlumber.

@Override
public Plumber findPlumber(final DataSchema schema, final RealtimeTuningConfig config, final FireDepartmentMetrics metrics) {
    // There can be only one.
    final Sink theSink = new Sink(interval, schema, config.getShardSpec(), version, config.getMaxRowsInMemory(), config.isReportParseExceptions());
    // Temporary directory to hold spilled segments.
    final File persistDir = new File(tmpSegmentDir, theSink.getSegment().getIdentifier());
    // Set of spilled segments. Will be merged at the end.
    final Set<File> spilled = Sets.newHashSet();
    // IndexMerger implementation.
    final IndexMerger theIndexMerger = config.getBuildV9Directly() ? indexMergerV9 : indexMerger;
    return new Plumber() {

        @Override
        public Object startJob() {
            return null;
        }

        @Override
        public int add(InputRow row, Supplier<Committer> committerSupplier) throws IndexSizeExceededException {
            Sink sink = getSink(row.getTimestampFromEpoch());
            if (sink == null) {
                return -1;
            }
            final int numRows = sink.add(row);
            if (!sink.canAppendRow()) {
                persist(committerSupplier.get());
            }
            return numRows;
        }

        private Sink getSink(long timestamp) {
            if (theSink.getInterval().contains(timestamp)) {
                return theSink;
            } else {
                return null;
            }
        }

        @Override
        public <T> QueryRunner<T> getQueryRunner(Query<T> query) {
            throw new UnsupportedOperationException("Don't query me, bro.");
        }

        @Override
        public void persist(Committer committer) {
            spillIfSwappable();
            committer.run();
        }

        @Override
        public void finishJob() {
            // The segment we will upload
            File fileToUpload = null;
            try {
                // User should have persisted everything by now.
                Preconditions.checkState(!theSink.swappable(), "All data must be persisted before fininshing the job!");
                if (spilled.size() == 0) {
                    throw new IllegalStateException("Nothing indexed?");
                } else if (spilled.size() == 1) {
                    fileToUpload = Iterables.getOnlyElement(spilled);
                } else {
                    List<QueryableIndex> indexes = Lists.newArrayList();
                    for (final File oneSpill : spilled) {
                        indexes.add(indexIO.loadIndex(oneSpill));
                    }
                    fileToUpload = new File(tmpSegmentDir, "merged");
                    theIndexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload, config.getIndexSpec());
                }
                // Map merged segment so we can extract dimensions
                final QueryableIndex mappedSegment = indexIO.loadIndex(fileToUpload);
                final DataSegment segmentToUpload = theSink.getSegment().withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())).withBinaryVersion(SegmentUtils.getVersionFromDir(fileToUpload));
                dataSegmentPusher.push(fileToUpload, segmentToUpload);
                log.info("Uploaded segment[%s]", segmentToUpload.getIdentifier());
            } catch (Exception e) {
                log.warn(e, "Failed to merge and upload");
                throw Throwables.propagate(e);
            } finally {
                try {
                    if (fileToUpload != null) {
                        log.info("Deleting Index File[%s]", fileToUpload);
                        FileUtils.deleteDirectory(fileToUpload);
                    }
                } catch (IOException e) {
                    log.warn(e, "Error deleting directory[%s]", fileToUpload);
                }
            }
        }

        private void spillIfSwappable() {
            if (theSink.swappable()) {
                final FireHydrant indexToPersist = theSink.swap();
                final int rowsToPersist = indexToPersist.getIndex().size();
                final File dirToPersist = getSpillDir(indexToPersist.getCount());
                log.info("Spilling index[%d] with rows[%d] to: %s", indexToPersist.getCount(), rowsToPersist, dirToPersist);
                try {
                    theIndexMerger.persist(indexToPersist.getIndex(), dirToPersist, config.getIndexSpec());
                    indexToPersist.swapSegment(null);
                    metrics.incrementRowOutputCount(rowsToPersist);
                    spilled.add(dirToPersist);
                } catch (Exception e) {
                    log.warn(e, "Failed to spill index[%d]", indexToPersist.getCount());
                    throw Throwables.propagate(e);
                }
            }
        }

        private File getSpillDir(final int n) {
            return new File(persistDir, String.format("spill%d", n));
        }
    };
}

Also used : IndexMerger(io.druid.segment.IndexMerger) Query(io.druid.query.Query) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) Sink(io.druid.segment.realtime.plumber.Sink) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) Plumber(io.druid.segment.realtime.plumber.Plumber) Supplier(com.google.common.base.Supplier) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Committer(io.druid.data.input.Committer) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File)

Example 8 with Committer

use of io.druid.data.input.Committer in project druid by druid-io.

the class RealtimePlumberSchoolTest method testPersist.

private void testPersist(final Object commitMetadata) throws Exception {
    plumber.getSinks().put(0L, new Sink(new Interval(0, TimeUnit.HOURS.toMillis(1)), schema, tuningConfig.getShardSpec(), new DateTime("2014-12-01T12:34:56.789").toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    Assert.assertNull(plumber.startJob());
    final InputRow row = EasyMock.createNiceMock(InputRow.class);
    EasyMock.expect(row.getTimestampFromEpoch()).andReturn(0L);
    EasyMock.expect(row.getDimensions()).andReturn(new ArrayList<String>());
    EasyMock.replay(row);
    final CountDownLatch doneSignal = new CountDownLatch(1);
    final Committer committer = new Committer() {

        @Override
        public Object getMetadata() {
            return commitMetadata;
        }

        @Override
        public void run() {
            doneSignal.countDown();
        }
    };
    plumber.add(row, Suppliers.ofInstance(committer));
    plumber.persist(committer);
    doneSignal.await();
    plumber.getSinks().clear();
    plumber.finishJob();
}

Also used : InputRow(io.druid.data.input.InputRow) Committer(io.druid.data.input.Committer) CountDownLatch(java.util.concurrent.CountDownLatch) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval)

Example 9 with Committer

use of io.druid.data.input.Committer in project druid by druid-io.

the class RealtimePlumberSchoolTest method testPersistHydrantGapsHelper.

private void testPersistHydrantGapsHelper(final Object commitMetadata) throws Exception {
    Interval testInterval = new Interval(new DateTime("1970-01-01"), new DateTime("1971-01-01"));
    RealtimePlumber plumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    plumber2.getSinks().put(0L, new Sink(testInterval, schema2, tuningConfig.getShardSpec(), new DateTime("2014-12-01T12:34:56.789").toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    Assert.assertNull(plumber2.startJob());
    final CountDownLatch doneSignal = new CountDownLatch(1);
    final Committer committer = new Committer() {

        @Override
        public Object getMetadata() {
            return commitMetadata;
        }

        @Override
        public void run() {
            doneSignal.countDown();
        }
    };
    plumber2.add(getTestInputRow("1970-01-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-02-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-03-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-04-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-05-01"), Suppliers.ofInstance(committer));
    plumber2.persist(committer);
    doneSignal.await();
    plumber2.getSinks().clear();
    plumber2.finishJob();
    File persistDir = plumber2.computePersistDir(schema2, testInterval);
    /* Check that all hydrants were persisted */
    for (int i = 0; i < 5; i++) {
        Assert.assertTrue(new File(persistDir, String.valueOf(i)).exists());
    }
    /* Create some gaps in the persisted hydrants and reload */
    FileUtils.deleteDirectory(new File(persistDir, "1"));
    FileUtils.deleteDirectory(new File(persistDir, "3"));
    RealtimePlumber restoredPlumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    restoredPlumber.bootstrapSinksFromDisk();
    Map<Long, Sink> sinks = restoredPlumber.getSinks();
    Assert.assertEquals(1, sinks.size());
    List<FireHydrant> hydrants = Lists.newArrayList(sinks.get(new Long(0)));
    DateTime startTime = new DateTime("1970-01-01T00:00:00.000Z");
    Interval expectedInterval = new Interval(startTime, new DateTime("1971-01-01T00:00:00.000Z"));
    Assert.assertEquals(0, hydrants.get(0).getCount());
    Assert.assertEquals(expectedInterval, hydrants.get(0).getSegment().getDataInterval());
    Assert.assertEquals(2, hydrants.get(1).getCount());
    Assert.assertEquals(expectedInterval, hydrants.get(1).getSegment().getDataInterval());
    Assert.assertEquals(4, hydrants.get(2).getCount());
    Assert.assertEquals(expectedInterval, hydrants.get(2).getSegment().getDataInterval());
    /* Delete all the hydrants and reload, no sink should be created */
    FileUtils.deleteDirectory(new File(persistDir, "0"));
    FileUtils.deleteDirectory(new File(persistDir, "2"));
    FileUtils.deleteDirectory(new File(persistDir, "4"));
    RealtimePlumber restoredPlumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    restoredPlumber2.bootstrapSinksFromDisk();
    Assert.assertEquals(0, restoredPlumber2.getSinks().size());
}

Also used : Committer(io.druid.data.input.Committer) FireHydrant(io.druid.segment.realtime.FireHydrant) CountDownLatch(java.util.concurrent.CountDownLatch) File(java.io.File) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval)

Aggregations

Committer (io.druid.data.input.Committer)9 Supplier (com.google.common.base.Supplier)4 InputRow (io.druid.data.input.InputRow)4 DataSegment (io.druid.timeline.DataSegment)4 CountDownLatch (java.util.concurrent.CountDownLatch)4 Interval (org.joda.time.Interval)4 RealtimeIOConfig (io.druid.segment.indexing.RealtimeIOConfig)3 FireDepartment (io.druid.segment.realtime.FireDepartment)3 FireHydrant (io.druid.segment.realtime.FireHydrant)3 RealtimeMetricsMonitor (io.druid.segment.realtime.RealtimeMetricsMonitor)3 File (java.io.File)3 IOException (java.io.IOException)3 ImmutableList (com.google.common.collect.ImmutableList)2 SegmentTransactionalInsertAction (io.druid.indexing.common.actions.SegmentTransactionalInsertAction)2 ISE (io.druid.java.util.common.ISE)2 ParseException (io.druid.java.util.common.parsers.ParseException)2 QueryableIndex (io.druid.segment.QueryableIndex)2 RealtimeTuningConfig (io.druid.segment.indexing.RealtimeTuningConfig)2 Appenderator (io.druid.segment.realtime.appenderator.Appenderator)2 FiniteAppenderatorDriver (io.druid.segment.realtime.appenderator.FiniteAppenderatorDriver)2