Search in sources :

Example 1 with FirehoseFactory

use of io.druid.data.input.FirehoseFactory in project druid by druid-io.

the class RealtimeManagerTest method setUp.

@Before
public void setUp() throws Exception {
    final List<TestInputRowHolder> rows = Arrays.asList(makeRow(new DateTime("9000-01-01").getMillis()), makeRow(new ParseException("parse error")), null, makeRow(new DateTime().getMillis()));
    ObjectMapper jsonMapper = new DefaultObjectMapper();
    schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    schema2 = new DataSchema("testV2", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    RealtimeIOConfig ioConfig = new RealtimeIOConfig(new FirehoseFactory() {

        @Override
        public Firehose connect(InputRowParser parser) throws IOException {
            return new TestFirehose(rows.iterator());
        }
    }, new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return plumber;
        }
    }, null);
    RealtimeIOConfig ioConfig2 = new RealtimeIOConfig(null, new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return plumber2;
        }
    }, new FirehoseFactoryV2() {

        @Override
        public FirehoseV2 connect(InputRowParser parser, Object arg1) throws IOException, ParseException {
            return new TestFirehoseV2(rows.iterator());
        }
    });
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
    plumber = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    realtimeManager = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema, ioConfig, tuningConfig)), null);
    plumber2 = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema2, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    realtimeManager2 = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema2, ioConfig2, tuningConfig)), null);
    tuningConfig_0 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(0), null, null, 0, 0, null, null);
    tuningConfig_1 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(1), null, null, 0, 0, null, null);
    schema3 = new DataSchema("testing", null, new AggregatorFactory[] { new CountAggregatorFactory("ignore") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    FireDepartment department_0 = new FireDepartment(schema3, ioConfig, tuningConfig_0);
    FireDepartment department_1 = new FireDepartment(schema3, ioConfig2, tuningConfig_1);
    QueryRunnerFactoryConglomerate conglomerate = new QueryRunnerFactoryConglomerate() {

        @Override
        public <T, QueryType extends Query<T>> QueryRunnerFactory<T, QueryType> findFactory(QueryType query) {
            return factory;
        }
    };
    chiefStartedLatch = new CountDownLatch(2);
    RealtimeManager.FireChief fireChief_0 = new RealtimeManager.FireChief(department_0, conglomerate) {

        @Override
        public void run() {
            super.initPlumber();
            chiefStartedLatch.countDown();
        }
    };
    RealtimeManager.FireChief fireChief_1 = new RealtimeManager.FireChief(department_1, conglomerate) {

        @Override
        public void run() {
            super.initPlumber();
            chiefStartedLatch.countDown();
        }
    };
    realtimeManager3 = new RealtimeManager(Arrays.asList(department_0, department_1), conglomerate, ImmutableMap.<String, Map<Integer, RealtimeManager.FireChief>>of("testing", ImmutableMap.of(0, fireChief_0, 1, fireChief_1)));
    startFireChiefWithPartitionNum(fireChief_0, 0);
    startFireChiefWithPartitionNum(fireChief_1, 1);
}
Also used : FirehoseV2(io.druid.data.input.FirehoseV2) RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) BaseQuery(io.druid.query.BaseQuery) Query(io.druid.query.Query) GroupByQuery(io.druid.query.groupby.GroupByQuery) FirehoseFactory(io.druid.data.input.FirehoseFactory) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) DateTime(org.joda.time.DateTime) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) QueryRunnerFactoryConglomerate(io.druid.query.QueryRunnerFactoryConglomerate) Sink(io.druid.segment.realtime.plumber.Sink) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Firehose(io.druid.data.input.Firehose) Period(org.joda.time.Period) IOException(java.io.IOException) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) DataSchema(io.druid.segment.indexing.DataSchema) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) Plumber(io.druid.segment.realtime.plumber.Plumber) ParseException(io.druid.java.util.common.parsers.ParseException) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) InputRowParser(io.druid.data.input.impl.InputRowParser) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) FirehoseFactoryV2(io.druid.data.input.FirehoseFactoryV2) Interval(org.joda.time.Interval) Before(org.junit.Before)

Example 2 with FirehoseFactory

use of io.druid.data.input.FirehoseFactory in project druid by druid-io.

the class CombiningFirehoseFactoryTest method testCombiningfirehose.

@Test
public void testCombiningfirehose() throws IOException {
    List<InputRow> list1 = Arrays.asList(makeRow(1, 1), makeRow(2, 2));
    List<InputRow> list2 = Arrays.asList(makeRow(3, 3), makeRow(4, 4), makeRow(5, 5));
    FirehoseFactory combiningFactory = new CombiningFirehoseFactory(Arrays.<FirehoseFactory>asList(new ListFirehoseFactory(list1), new ListFirehoseFactory(list2)));
    final Firehose firehose = combiningFactory.connect(null);
    for (int i = 1; i < 6; i++) {
        Assert.assertTrue(firehose.hasMore());
        final InputRow inputRow = firehose.nextRow();
        Assert.assertEquals(i, inputRow.getTimestampFromEpoch());
        Assert.assertEquals(i, inputRow.getFloatMetric("test"), 0);
    }
    Assert.assertFalse(firehose.hasMore());
}
Also used : CombiningFirehoseFactory(io.druid.segment.realtime.firehose.CombiningFirehoseFactory) FirehoseFactory(io.druid.data.input.FirehoseFactory) Firehose(io.druid.data.input.Firehose) InputRow(io.druid.data.input.InputRow) CombiningFirehoseFactory(io.druid.segment.realtime.firehose.CombiningFirehoseFactory) Test(org.junit.Test)

Example 3 with FirehoseFactory

use of io.druid.data.input.FirehoseFactory in project druid by druid-io.

the class IndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    final boolean determineIntervals = !ingestionSchema.getDataSchema().getGranularitySpec().bucketIntervals().isPresent();
    final FirehoseFactory delegateFirehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory();
    if (delegateFirehoseFactory instanceof IngestSegmentFirehoseFactory) {
        // pass toolbox to Firehose
        ((IngestSegmentFirehoseFactory) delegateFirehoseFactory).setTaskToolbox(toolbox);
    }
    final FirehoseFactory firehoseFactory;
    if (ingestionSchema.getIOConfig().isSkipFirehoseCaching() || delegateFirehoseFactory instanceof ReplayableFirehoseFactory) {
        firehoseFactory = delegateFirehoseFactory;
    } else {
        firehoseFactory = new ReplayableFirehoseFactory(delegateFirehoseFactory, ingestionSchema.getTuningConfig().isReportParseExceptions(), null, null, smileMapper);
    }
    final Map<Interval, List<ShardSpec>> shardSpecs = determineShardSpecs(toolbox, firehoseFactory);
    final String version;
    final DataSchema dataSchema;
    if (determineIntervals) {
        Interval interval = JodaUtils.umbrellaInterval(shardSpecs.keySet());
        TaskLock lock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
        version = lock.getVersion();
        dataSchema = ingestionSchema.getDataSchema().withGranularitySpec(ingestionSchema.getDataSchema().getGranularitySpec().withIntervals(JodaUtils.condenseIntervals(shardSpecs.keySet())));
    } else {
        version = Iterables.getOnlyElement(getTaskLocks(toolbox)).getVersion();
        dataSchema = ingestionSchema.getDataSchema();
    }
    if (generateAndPublishSegments(toolbox, dataSchema, shardSpecs, version, firehoseFactory)) {
        return TaskStatus.success(getId());
    } else {
        return TaskStatus.failure(getId());
    }
}
Also used : IngestSegmentFirehoseFactory(io.druid.indexing.firehose.IngestSegmentFirehoseFactory) DataSchema(io.druid.segment.indexing.DataSchema) TaskLock(io.druid.indexing.common.TaskLock) IngestSegmentFirehoseFactory(io.druid.indexing.firehose.IngestSegmentFirehoseFactory) ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) FirehoseFactory(io.druid.data.input.FirehoseFactory) LockAcquireAction(io.druid.indexing.common.actions.LockAcquireAction) ReplayableFirehoseFactory(io.druid.segment.realtime.firehose.ReplayableFirehoseFactory) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Interval(org.joda.time.Interval)

Example 4 with FirehoseFactory

use of io.druid.data.input.FirehoseFactory in project druid by druid-io.

the class RealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    runThread = Thread.currentThread();
    if (this.plumber != null) {
        throw new IllegalStateException("WTF?!? run with non-null plumber??!");
    }
    boolean normalExit = true;
    // It would be nice to get the PlumberSchool in the constructor.  Although that will need jackson injectables for
    // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been introduced.
    final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);
    // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
    // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
    // which will typically be either the main data processing loop or the persist thread.
    // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
    final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {

        @Override
        public void announceSegment(final DataSegment segment) throws IOException {
            // Side effect: Calling announceSegment causes a lock to be acquired
            toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            toolbox.getSegmentAnnouncer().announceSegment(segment);
        }

        @Override
        public void unannounceSegment(final DataSegment segment) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegment(segment);
            } finally {
                toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
            }
        }

        @Override
        public void announceSegments(Iterable<DataSegment> segments) throws IOException {
            // Side effect: Calling announceSegments causes locks to be acquired
            for (DataSegment segment : segments) {
                toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            }
            toolbox.getSegmentAnnouncer().announceSegments(segments);
        }

        @Override
        public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegments(segments);
            } finally {
                for (DataSegment segment : segments) {
                    toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
                }
            }
        }

        @Override
        public boolean isAnnounced(DataSegment segment) {
            return toolbox.getSegmentAnnouncer().isAnnounced(segment);
        }
    };
    // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
    // NOTE: (and thus the firehose)
    // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
    // realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
    // the plumber such that waiting for the coordinator doesn't block data processing.
    final VersioningPolicy versioningPolicy = new VersioningPolicy() {

        @Override
        public String getVersion(final Interval interval) {
            try {
                // Side effect: Calling getVersion causes a lock to be acquired
                final TaskLock myLock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
                return myLock.getVersion();
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    };
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
    RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(new File(toolbox.getTaskWorkDir(), "persist")).withVersioningPolicy(versioningPolicy);
    final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
    this.metrics = fireDepartment.getMetrics();
    final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() }));
    this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
    // NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
    // NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
    // NOTE: (partitionNum_index.zip for HDFS data storage) and descriptor.json (partitionNum_descriptor.json for
    // NOTE: HDFS data storage) to mismatch, or it can cause historical nodes to load different instances of
    // NOTE: the "same" segment.
    final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryExecutorService(), toolbox.getIndexMerger(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getObjectMapper());
    this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
    Supplier<Committer> committerSupplier = null;
    try {
        plumber.startJob();
        // Set up metrics emission
        toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(spec.getDataSchema().getParser());
                committerSupplier = Committers.supplierFromFirehose(firehose);
            }
        }
        // Time to read data!
        while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
            Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
        }
    } catch (Throwable e) {
        normalExit = false;
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        throw e;
    } finally {
        if (normalExit) {
            try {
                // Persist if we had actually started.
                if (firehose != null) {
                    log.info("Persisting remaining data.");
                    final Committer committer = committerSupplier.get();
                    final CountDownLatch persistLatch = new CountDownLatch(1);
                    plumber.persist(new Committer() {

                        @Override
                        public Object getMetadata() {
                            return committer.getMetadata();
                        }

                        @Override
                        public void run() {
                            try {
                                committer.run();
                            } finally {
                                persistLatch.countDown();
                            }
                        }
                    });
                    persistLatch.await();
                }
                if (gracefullyStopped) {
                    log.info("Gracefully stopping.");
                } else {
                    log.info("Finishing the job.");
                    synchronized (this) {
                        if (gracefullyStopped) {
                            // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                            log.info("Gracefully stopping.");
                        } else {
                            finishingJob = true;
                        }
                    }
                    if (finishingJob) {
                        plumber.finishJob();
                    }
                }
            } catch (InterruptedException e) {
                log.debug(e, "Interrupted while finishing the job");
            } catch (Exception e) {
                log.makeAlert(e, "Failed to finish realtime task").emit();
                throw e;
            } finally {
                if (firehose != null) {
                    CloseQuietly.close(firehose);
                }
                toolbox.getMonitorScheduler().removeMonitor(metricsMonitor);
            }
        }
    }
    log.info("Job done!");
    return TaskStatus.success(getId());
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) DataSegmentAnnouncer(io.druid.server.coordination.DataSegmentAnnouncer) EventReceiverFirehoseFactory(io.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(io.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(io.druid.data.input.FirehoseFactory) DataSegment(io.druid.timeline.DataSegment) FireDepartment(io.druid.segment.realtime.FireDepartment) SegmentPublisher(io.druid.segment.realtime.SegmentPublisher) TaskLock(io.druid.indexing.common.TaskLock) LockAcquireAction(io.druid.indexing.common.actions.LockAcquireAction) IOException(java.io.IOException) RealtimePlumberSchool(io.druid.segment.realtime.plumber.RealtimePlumberSchool) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) LockReleaseAction(io.druid.indexing.common.actions.LockReleaseAction) IOException(java.io.IOException) DataSchema(io.druid.segment.indexing.DataSchema) VersioningPolicy(io.druid.segment.realtime.plumber.VersioningPolicy) RealtimePlumberSchool(io.druid.segment.realtime.plumber.RealtimePlumberSchool) RealtimeMetricsMonitor(io.druid.segment.realtime.RealtimeMetricsMonitor) Committer(io.druid.data.input.Committer) File(java.io.File) Interval(org.joda.time.Interval)

Example 5 with FirehoseFactory

use of io.druid.data.input.FirehoseFactory in project druid by druid-io.

the class RealtimeIndexTaskTest method makeRealtimeTask.

private RealtimeIndexTask makeRealtimeTask(final String taskId, boolean reportParseExceptions, long handoffTimeout) {
    ObjectMapper objectMapper = new DefaultObjectMapper();
    DataSchema dataSchema = new DataSchema("test_ds", null, new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), objectMapper);
    RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehoseFactory(), null, null);
    RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(1000, new Period("P1Y"), new Period("PT10M"), null, null, new ServerTimeRejectionPolicyFactory(), null, null, null, buildV9Directly, 0, 0, reportParseExceptions, handoffTimeout);
    return new RealtimeIndexTask(taskId, null, new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig), null) {

        @Override
        protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
            return true;
        }
    };
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) FirehoseFactory(io.druid.data.input.FirehoseFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Period(org.joda.time.Period) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) FireDepartment(io.druid.segment.realtime.FireDepartment) ServerTimeRejectionPolicyFactory(io.druid.segment.realtime.plumber.ServerTimeRejectionPolicyFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Aggregations

FirehoseFactory (io.druid.data.input.FirehoseFactory)5 DataSchema (io.druid.segment.indexing.DataSchema)4 RealtimeIOConfig (io.druid.segment.indexing.RealtimeIOConfig)3 RealtimeTuningConfig (io.druid.segment.indexing.RealtimeTuningConfig)3 Interval (org.joda.time.Interval)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 Firehose (io.druid.data.input.Firehose)2 TaskLock (io.druid.indexing.common.TaskLock)2 LockAcquireAction (io.druid.indexing.common.actions.LockAcquireAction)2 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)2 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)2 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)2 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)2 FireDepartment (io.druid.segment.realtime.FireDepartment)2 PlumberSchool (io.druid.segment.realtime.plumber.PlumberSchool)2 IOException (java.io.IOException)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Committer (io.druid.data.input.Committer)1