Search in sources :

Example 1 with Plumber

use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.

the class TaskAnnouncementTest method testBackwardsCompatibleSerde.

@Test
public void testBackwardsCompatibleSerde() throws Exception {
    final Task task = new RealtimeIndexTask("theid", new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], null, new DefaultObjectMapper()), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return null;
        }
    }, null), null), null);
    final TaskStatus status = TaskStatus.running(task.getId());
    final TaskAnnouncement announcement = TaskAnnouncement.create(task, status, TaskLocation.unknown());
    final String statusJson = jsonMapper.writeValueAsString(status);
    final String announcementJson = jsonMapper.writeValueAsString(announcement);
    final TaskStatus statusFromStatus = jsonMapper.readValue(statusJson, TaskStatus.class);
    final TaskStatus statusFromAnnouncement = jsonMapper.readValue(announcementJson, TaskStatus.class);
    final TaskAnnouncement announcementFromStatus = jsonMapper.readValue(statusJson, TaskAnnouncement.class);
    final TaskAnnouncement announcementFromAnnouncement = jsonMapper.readValue(announcementJson, TaskAnnouncement.class);
    Assert.assertEquals("theid", statusFromStatus.getId());
    Assert.assertEquals("theid", statusFromAnnouncement.getId());
    Assert.assertEquals("theid", announcementFromStatus.getTaskStatus().getId());
    Assert.assertEquals("theid", announcementFromAnnouncement.getTaskStatus().getId());
    Assert.assertEquals("theid", announcementFromStatus.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals("rofl", announcementFromAnnouncement.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(1, announcementFromStatus.getTaskResource().getRequiredCapacity());
    Assert.assertEquals(2, announcementFromAnnouncement.getTaskResource().getRequiredCapacity());
}
Also used : RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) Task(io.druid.indexing.common.task.Task) RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) TaskResource(io.druid.indexing.common.task.TaskResource) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) LocalFirehoseFactory(io.druid.segment.realtime.firehose.LocalFirehoseFactory) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) TaskStatus(io.druid.indexing.common.TaskStatus) DataSchema(io.druid.segment.indexing.DataSchema) FireDepartment(io.druid.segment.realtime.FireDepartment) FireDepartmentMetrics(io.druid.segment.realtime.FireDepartmentMetrics) Plumber(io.druid.segment.realtime.plumber.Plumber) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) File(java.io.File) Test(org.junit.Test)

Example 2 with Plumber

use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.

the class RealtimeManagerTest method setUp.

@Before
public void setUp() throws Exception {
    final List<TestInputRowHolder> rows = Arrays.asList(makeRow(new DateTime("9000-01-01").getMillis()), makeRow(new ParseException("parse error")), null, makeRow(new DateTime().getMillis()));
    ObjectMapper jsonMapper = new DefaultObjectMapper();
    schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    schema2 = new DataSchema("testV2", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    RealtimeIOConfig ioConfig = new RealtimeIOConfig(new FirehoseFactory() {

        @Override
        public Firehose connect(InputRowParser parser) throws IOException {
            return new TestFirehose(rows.iterator());
        }
    }, new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return plumber;
        }
    }, null);
    RealtimeIOConfig ioConfig2 = new RealtimeIOConfig(null, new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return plumber2;
        }
    }, new FirehoseFactoryV2() {

        @Override
        public FirehoseV2 connect(InputRowParser parser, Object arg1) throws IOException, ParseException {
            return new TestFirehoseV2(rows.iterator());
        }
    });
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
    plumber = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    realtimeManager = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema, ioConfig, tuningConfig)), null);
    plumber2 = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema2, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    realtimeManager2 = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema2, ioConfig2, tuningConfig)), null);
    tuningConfig_0 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(0), null, null, 0, 0, null, null);
    tuningConfig_1 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(1), null, null, 0, 0, null, null);
    schema3 = new DataSchema("testing", null, new AggregatorFactory[] { new CountAggregatorFactory("ignore") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    FireDepartment department_0 = new FireDepartment(schema3, ioConfig, tuningConfig_0);
    FireDepartment department_1 = new FireDepartment(schema3, ioConfig2, tuningConfig_1);
    QueryRunnerFactoryConglomerate conglomerate = new QueryRunnerFactoryConglomerate() {

        @Override
        public <T, QueryType extends Query<T>> QueryRunnerFactory<T, QueryType> findFactory(QueryType query) {
            return factory;
        }
    };
    chiefStartedLatch = new CountDownLatch(2);
    RealtimeManager.FireChief fireChief_0 = new RealtimeManager.FireChief(department_0, conglomerate) {

        @Override
        public void run() {
            super.initPlumber();
            chiefStartedLatch.countDown();
        }
    };
    RealtimeManager.FireChief fireChief_1 = new RealtimeManager.FireChief(department_1, conglomerate) {

        @Override
        public void run() {
            super.initPlumber();
            chiefStartedLatch.countDown();
        }
    };
    realtimeManager3 = new RealtimeManager(Arrays.asList(department_0, department_1), conglomerate, ImmutableMap.<String, Map<Integer, RealtimeManager.FireChief>>of("testing", ImmutableMap.of(0, fireChief_0, 1, fireChief_1)));
    startFireChiefWithPartitionNum(fireChief_0, 0);
    startFireChiefWithPartitionNum(fireChief_1, 1);
}
Also used : FirehoseV2(io.druid.data.input.FirehoseV2) RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) BaseQuery(io.druid.query.BaseQuery) Query(io.druid.query.Query) GroupByQuery(io.druid.query.groupby.GroupByQuery) FirehoseFactory(io.druid.data.input.FirehoseFactory) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) DateTime(org.joda.time.DateTime) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) QueryRunnerFactoryConglomerate(io.druid.query.QueryRunnerFactoryConglomerate) Sink(io.druid.segment.realtime.plumber.Sink) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Firehose(io.druid.data.input.Firehose) Period(org.joda.time.Period) IOException(java.io.IOException) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) DataSchema(io.druid.segment.indexing.DataSchema) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) Plumber(io.druid.segment.realtime.plumber.Plumber) ParseException(io.druid.java.util.common.parsers.ParseException) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) InputRowParser(io.druid.data.input.impl.InputRowParser) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) FirehoseFactoryV2(io.druid.data.input.FirehoseFactoryV2) Interval(org.joda.time.Interval) Before(org.junit.Before)

Example 3 with Plumber

use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.

the class DruidJsonValidatorTest method testTaskValidator.

@Test
public void testTaskValidator() throws Exception {
    final ObjectMapper jsonMapper = new DefaultObjectMapper();
    for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
        jsonMapper.registerModule(jacksonModule);
    }
    final RealtimeIndexTask task = new RealtimeIndexTask(null, new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return null;
        }
    }, null), new RealtimeTuningConfig(1, new Period("PT10M"), null, null, null, null, 1, NoneShardSpec.instance(), new IndexSpec(), null, 0, 0, true, null)), null);
    File tmp = temporaryFolder.newFile("test_task.json");
    jsonMapper.writeValue(tmp, task);
    parseCommand("validator", "-f", tmp.getAbsolutePath(), "-t", "task").run();
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) IndexSpec(io.druid.segment.IndexSpec) RealtimeIndexTask(io.druid.indexing.common.task.RealtimeIndexTask) TaskResource(io.druid.indexing.common.task.TaskResource) Period(org.joda.time.Period) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) LocalFirehoseFactory(io.druid.segment.realtime.firehose.LocalFirehoseFactory) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) DataSchema(io.druid.segment.indexing.DataSchema) FireDepartment(io.druid.segment.realtime.FireDepartment) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) FirehoseModule(io.druid.guice.FirehoseModule) FireDepartmentMetrics(io.druid.segment.realtime.FireDepartmentMetrics) Plumber(io.druid.segment.realtime.plumber.Plumber) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) Module(com.fasterxml.jackson.databind.Module) FirehoseModule(io.druid.guice.FirehoseModule) File(java.io.File) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 4 with Plumber

use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.

the class YeOldePlumberSchool method findPlumber.

@Override
public Plumber findPlumber(final DataSchema schema, final RealtimeTuningConfig config, final FireDepartmentMetrics metrics) {
    // There can be only one.
    final Sink theSink = new Sink(interval, schema, config.getShardSpec(), version, config.getMaxRowsInMemory(), config.isReportParseExceptions());
    // Temporary directory to hold spilled segments.
    final File persistDir = new File(tmpSegmentDir, theSink.getSegment().getIdentifier());
    // Set of spilled segments. Will be merged at the end.
    final Set<File> spilled = Sets.newHashSet();
    // IndexMerger implementation.
    final IndexMerger theIndexMerger = config.getBuildV9Directly() ? indexMergerV9 : indexMerger;
    return new Plumber() {

        @Override
        public Object startJob() {
            return null;
        }

        @Override
        public int add(InputRow row, Supplier<Committer> committerSupplier) throws IndexSizeExceededException {
            Sink sink = getSink(row.getTimestampFromEpoch());
            if (sink == null) {
                return -1;
            }
            final int numRows = sink.add(row);
            if (!sink.canAppendRow()) {
                persist(committerSupplier.get());
            }
            return numRows;
        }

        private Sink getSink(long timestamp) {
            if (theSink.getInterval().contains(timestamp)) {
                return theSink;
            } else {
                return null;
            }
        }

        @Override
        public <T> QueryRunner<T> getQueryRunner(Query<T> query) {
            throw new UnsupportedOperationException("Don't query me, bro.");
        }

        @Override
        public void persist(Committer committer) {
            spillIfSwappable();
            committer.run();
        }

        @Override
        public void finishJob() {
            // The segment we will upload
            File fileToUpload = null;
            try {
                // User should have persisted everything by now.
                Preconditions.checkState(!theSink.swappable(), "All data must be persisted before fininshing the job!");
                if (spilled.size() == 0) {
                    throw new IllegalStateException("Nothing indexed?");
                } else if (spilled.size() == 1) {
                    fileToUpload = Iterables.getOnlyElement(spilled);
                } else {
                    List<QueryableIndex> indexes = Lists.newArrayList();
                    for (final File oneSpill : spilled) {
                        indexes.add(indexIO.loadIndex(oneSpill));
                    }
                    fileToUpload = new File(tmpSegmentDir, "merged");
                    theIndexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload, config.getIndexSpec());
                }
                // Map merged segment so we can extract dimensions
                final QueryableIndex mappedSegment = indexIO.loadIndex(fileToUpload);
                final DataSegment segmentToUpload = theSink.getSegment().withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())).withBinaryVersion(SegmentUtils.getVersionFromDir(fileToUpload));
                dataSegmentPusher.push(fileToUpload, segmentToUpload);
                log.info("Uploaded segment[%s]", segmentToUpload.getIdentifier());
            } catch (Exception e) {
                log.warn(e, "Failed to merge and upload");
                throw Throwables.propagate(e);
            } finally {
                try {
                    if (fileToUpload != null) {
                        log.info("Deleting Index File[%s]", fileToUpload);
                        FileUtils.deleteDirectory(fileToUpload);
                    }
                } catch (IOException e) {
                    log.warn(e, "Error deleting directory[%s]", fileToUpload);
                }
            }
        }

        private void spillIfSwappable() {
            if (theSink.swappable()) {
                final FireHydrant indexToPersist = theSink.swap();
                final int rowsToPersist = indexToPersist.getIndex().size();
                final File dirToPersist = getSpillDir(indexToPersist.getCount());
                log.info("Spilling index[%d] with rows[%d] to: %s", indexToPersist.getCount(), rowsToPersist, dirToPersist);
                try {
                    theIndexMerger.persist(indexToPersist.getIndex(), dirToPersist, config.getIndexSpec());
                    indexToPersist.swapSegment(null);
                    metrics.incrementRowOutputCount(rowsToPersist);
                    spilled.add(dirToPersist);
                } catch (Exception e) {
                    log.warn(e, "Failed to spill index[%d]", indexToPersist.getCount());
                    throw Throwables.propagate(e);
                }
            }
        }

        private File getSpillDir(final int n) {
            return new File(persistDir, String.format("spill%d", n));
        }
    };
}
Also used : IndexMerger(io.druid.segment.IndexMerger) Query(io.druid.query.Query) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) Sink(io.druid.segment.realtime.plumber.Sink) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) Plumber(io.druid.segment.realtime.plumber.Plumber) Supplier(com.google.common.base.Supplier) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Committer(io.druid.data.input.Committer) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File)

Example 5 with Plumber

use of io.druid.segment.realtime.plumber.Plumber in project druid by druid-io.

the class TaskSerdeTest method testRealtimeIndexTaskSerde.

@Test
public void testRealtimeIndexTaskSerde() throws Exception {
    final RealtimeIndexTask task = new RealtimeIndexTask(null, new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return null;
        }
    }, null), new RealtimeTuningConfig(1, new Period("PT10M"), null, null, null, null, 1, NoneShardSpec.instance(), indexSpec, null, 0, 0, true, null)), null);
    final String json = jsonMapper.writeValueAsString(task);
    // Just want to run the clock a bit to make sure the task id doesn't change
    Thread.sleep(100);
    final RealtimeIndexTask task2 = (RealtimeIndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
    Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(new Period("PT10M"), task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
    Assert.assertEquals(Granularities.HOUR, task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
    Assert.assertTrue(task.getRealtimeIngestionSchema().getTuningConfig().isReportParseExceptions());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
    Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod(), task2.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
    Assert.assertEquals(task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity(), task2.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) Period(org.joda.time.Period) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) LocalFirehoseFactory(io.druid.segment.realtime.firehose.LocalFirehoseFactory) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) DataSchema(io.druid.segment.indexing.DataSchema) FireDepartment(io.druid.segment.realtime.FireDepartment) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) FireDepartmentMetrics(io.druid.segment.realtime.FireDepartmentMetrics) Plumber(io.druid.segment.realtime.plumber.Plumber) File(java.io.File) Test(org.junit.Test)

Aggregations

Plumber (io.druid.segment.realtime.plumber.Plumber)5 DataSchema (io.druid.segment.indexing.DataSchema)4 RealtimeIOConfig (io.druid.segment.indexing.RealtimeIOConfig)4 RealtimeTuningConfig (io.druid.segment.indexing.RealtimeTuningConfig)4 PlumberSchool (io.druid.segment.realtime.plumber.PlumberSchool)4 File (java.io.File)4 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)3 UniformGranularitySpec (io.druid.segment.indexing.granularity.UniformGranularitySpec)3 FireDepartment (io.druid.segment.realtime.FireDepartment)3 FireDepartmentMetrics (io.druid.segment.realtime.FireDepartmentMetrics)3 LocalFirehoseFactory (io.druid.segment.realtime.firehose.LocalFirehoseFactory)3 Period (org.joda.time.Period)3 Test (org.junit.Test)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 RealtimeIndexTask (io.druid.indexing.common.task.RealtimeIndexTask)2 TaskResource (io.druid.indexing.common.task.TaskResource)2 Query (io.druid.query.Query)2 Sink (io.druid.segment.realtime.plumber.Sink)2 IOException (java.io.IOException)2 Module (com.fasterxml.jackson.databind.Module)1