Search in sources :

Example 1 with Sink

use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class AppenderatorImpl method add.

@Override
public int add(final SegmentIdentifier identifier, final InputRow row, final Supplier<Committer> committerSupplier) throws IndexSizeExceededException, SegmentNotWritableException {
    if (!identifier.getDataSource().equals(schema.getDataSource())) {
        throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
    }
    final Sink sink = getOrCreateSink(identifier);
    metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
    final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
    final int sinkRowsInMemoryAfterAdd;
    try {
        sinkRowsInMemoryAfterAdd = sink.add(row);
    } catch (IndexSizeExceededException e) {
        // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
        // can't add the row (it just failed). This should never actually happen, though, because we check
        // sink.canAddRow after returning from add.
        log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
        throw e;
    }
    if (sinkRowsInMemoryAfterAdd < 0) {
        throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
    }
    rowsCurrentlyInMemory.addAndGet(sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd);
    if (!sink.canAppendRow() || System.currentTimeMillis() > nextFlush || rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
        // persistAll clears rowsCurrentlyInMemory, no need to update it.
        persistAll(committerSupplier.get());
    }
    return sink.getNumRows();
}
Also used : Sink(io.druid.segment.realtime.plumber.Sink) IAE(io.druid.java.util.common.IAE) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException)

Example 2 with Sink

use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class AppenderatorImpl method getOrCreateSink.

private Sink getOrCreateSink(final SegmentIdentifier identifier) {
    Sink retVal = sinks.get(identifier);
    if (retVal == null) {
        retVal = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions());
        try {
            segmentAnnouncer.announceSegment(retVal.getSegment());
        } catch (IOException e) {
            log.makeAlert(e, "Failed to announce new segment[%s]", schema.getDataSource()).addData("interval", retVal.getInterval()).emit();
        }
        sinks.put(identifier, retVal);
        metrics.setSinkCount(sinks.size());
        sinkTimeline.add(retVal.getInterval(), retVal.getVersion(), identifier.getShardSpec().createChunk(retVal));
    }
    return retVal;
}
Also used : Sink(io.druid.segment.realtime.plumber.Sink) IOException(java.io.IOException)

Example 3 with Sink

use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class AppenderatorImpl method bootstrapSinksFromDisk.

/**
   * Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
   *
   * @return persisted commit metadata
   */
private Object bootstrapSinksFromDisk() {
    Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
    final File baseDir = tuningConfig.getBasePersistDirectory();
    if (!baseDir.exists()) {
        return null;
    }
    final File[] files = baseDir.listFiles();
    if (files == null) {
        return null;
    }
    final File commitFile = computeCommitFile();
    final Committed committed;
    try {
        if (commitFile.exists()) {
            committed = objectMapper.readValue(commitFile, Committed.class);
        } else {
            committed = Committed.nil();
        }
    } catch (Exception e) {
        throw new ISE(e, "Failed to read commitFile: %s", commitFile);
    }
    log.info("Loading sinks from[%s]: %s", baseDir, committed.getHydrants().keySet());
    for (File sinkDir : files) {
        final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
        if (!identifierFile.isFile()) {
            // No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
            continue;
        }
        try {
            final SegmentIdentifier identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdentifier.class);
            final int committedHydrants = committed.getCommittedHydrants(identifier.getIdentifierAsString());
            if (committedHydrants <= 0) {
                log.info("Removing uncommitted sink at [%s]", sinkDir);
                FileUtils.deleteDirectory(sinkDir);
                continue;
            }
            // To avoid reading and listing of "merged" dir and other special files
            final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {

                @Override
                public boolean accept(File dir, String fileName) {
                    return !(Ints.tryParse(fileName) == null);
                }
            });
            Arrays.sort(sinkFiles, new Comparator<File>() {

                @Override
                public int compare(File o1, File o2) {
                    return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
                }
            });
            List<FireHydrant> hydrants = Lists.newArrayList();
            for (File hydrantDir : sinkFiles) {
                final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
                if (hydrantNumber >= committedHydrants) {
                    log.info("Removing uncommitted segment at [%s]", hydrantDir);
                    FileUtils.deleteDirectory(hydrantDir);
                } else {
                    log.info("Loading previously persisted segment at [%s]", hydrantDir);
                    if (hydrantNumber != hydrants.size()) {
                        throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
                    }
                    hydrants.add(new FireHydrant(new QueryableIndexSegment(identifier.getIdentifierAsString(), indexIO.loadIndex(hydrantDir)), hydrantNumber));
                }
            }
            // Make sure we loaded enough hydrants.
            if (committedHydrants != hydrants.size()) {
                throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
            }
            Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions(), hydrants);
            sinks.put(identifier, currSink);
            sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
            segmentAnnouncer.announceSegment(currSink.getSegment());
        } catch (IOException e) {
            log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
        }
    }
    // Make sure we loaded all committed sinks.
    final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), new Function<SegmentIdentifier, String>() {

        @Override
        public String apply(SegmentIdentifier input) {
            return input.getIdentifierAsString();
        }
    }));
    final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
    if (!missingSinks.isEmpty()) {
        throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
    }
    return committed.getMetadata();
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) IOException(java.io.IOException) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) FilenameFilter(java.io.FilenameFilter) Function(com.google.common.base.Function) Sink(io.druid.segment.realtime.plumber.Sink) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File)

Example 4 with Sink

use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class AppenderatorImpl method persistAll.

@Override
public ListenableFuture<Object> persistAll(final Committer committer) {
    // Submit persistAll task to the persistExecutor
    final Map<SegmentIdentifier, Integer> commitHydrants = Maps.newHashMap();
    final List<Pair<FireHydrant, SegmentIdentifier>> indexesToPersist = Lists.newArrayList();
    final Set<SegmentIdentifier> identifiers = sinks.keySet();
    for (SegmentIdentifier identifier : identifiers) {
        final Sink sink = sinks.get(identifier);
        final List<FireHydrant> hydrants = Lists.newArrayList(sink);
        commitHydrants.put(identifier, hydrants.size());
        final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
        for (FireHydrant hydrant : hydrants.subList(0, limit)) {
            if (!hydrant.hasSwapped()) {
                log.info("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
                indexesToPersist.add(Pair.of(hydrant, identifier));
            }
        }
        if (sink.swappable()) {
            indexesToPersist.add(Pair.of(sink.swap(), identifier));
        }
    }
    log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
    final String threadName = String.format("%s-incremental-persist", schema.getDataSource());
    final Object commitMetadata = committer.getMetadata();
    final Stopwatch runExecStopwatch = Stopwatch.createStarted();
    final Stopwatch persistStopwatch = Stopwatch.createStarted();
    final ListenableFuture<Object> future = persistExecutor.submit(new ThreadRenamingCallable<Object>(threadName) {

        @Override
        public Object doCall() {
            try {
                for (Pair<FireHydrant, SegmentIdentifier> pair : indexesToPersist) {
                    metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
                }
                log.info("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(Iterables.transform(commitHydrants.entrySet(), new Function<Map.Entry<SegmentIdentifier, Integer>, String>() {

                    @Override
                    public String apply(Map.Entry<SegmentIdentifier, Integer> entry) {
                        return String.format("%s:%d", entry.getKey().getIdentifierAsString(), entry.getValue());
                    }
                })));
                committer.run();
                objectMapper.writeValue(computeCommitFile(), Committed.create(commitHydrants, commitMetadata));
                return commitMetadata;
            } catch (Exception e) {
                metrics.incrementFailedPersists();
                throw Throwables.propagate(e);
            } finally {
                metrics.incrementNumPersists();
                metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
                persistStopwatch.stop();
            }
        }
    });
    final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
    metrics.incrementPersistBackPressureMillis(startDelay);
    if (startDelay > WARN_DELAY) {
        log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
    }
    runExecStopwatch.stop();
    resetNextFlush();
    // NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
    rowsCurrentlyInMemory.set(0);
    return future;
}
Also used : Stopwatch(com.google.common.base.Stopwatch) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Function(com.google.common.base.Function) Sink(io.druid.segment.realtime.plumber.Sink) FireHydrant(io.druid.segment.realtime.FireHydrant) Map(java.util.Map) Pair(io.druid.java.util.common.Pair)

Example 5 with Sink

use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.

the class RealtimeManagerTest method setUp.

@Before
public void setUp() throws Exception {
    final List<TestInputRowHolder> rows = Arrays.asList(makeRow(new DateTime("9000-01-01").getMillis()), makeRow(new ParseException("parse error")), null, makeRow(new DateTime().getMillis()));
    ObjectMapper jsonMapper = new DefaultObjectMapper();
    schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    schema2 = new DataSchema("testV2", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    RealtimeIOConfig ioConfig = new RealtimeIOConfig(new FirehoseFactory() {

        @Override
        public Firehose connect(InputRowParser parser) throws IOException {
            return new TestFirehose(rows.iterator());
        }
    }, new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return plumber;
        }
    }, null);
    RealtimeIOConfig ioConfig2 = new RealtimeIOConfig(null, new PlumberSchool() {

        @Override
        public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
            return plumber2;
        }
    }, new FirehoseFactoryV2() {

        @Override
        public FirehoseV2 connect(InputRowParser parser, Object arg1) throws IOException, ParseException {
            return new TestFirehoseV2(rows.iterator());
        }
    });
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
    plumber = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    realtimeManager = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema, ioConfig, tuningConfig)), null);
    plumber2 = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema2, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    realtimeManager2 = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema2, ioConfig2, tuningConfig)), null);
    tuningConfig_0 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(0), null, null, 0, 0, null, null);
    tuningConfig_1 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(1), null, null, 0, 0, null, null);
    schema3 = new DataSchema("testing", null, new AggregatorFactory[] { new CountAggregatorFactory("ignore") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
    FireDepartment department_0 = new FireDepartment(schema3, ioConfig, tuningConfig_0);
    FireDepartment department_1 = new FireDepartment(schema3, ioConfig2, tuningConfig_1);
    QueryRunnerFactoryConglomerate conglomerate = new QueryRunnerFactoryConglomerate() {

        @Override
        public <T, QueryType extends Query<T>> QueryRunnerFactory<T, QueryType> findFactory(QueryType query) {
            return factory;
        }
    };
    chiefStartedLatch = new CountDownLatch(2);
    RealtimeManager.FireChief fireChief_0 = new RealtimeManager.FireChief(department_0, conglomerate) {

        @Override
        public void run() {
            super.initPlumber();
            chiefStartedLatch.countDown();
        }
    };
    RealtimeManager.FireChief fireChief_1 = new RealtimeManager.FireChief(department_1, conglomerate) {

        @Override
        public void run() {
            super.initPlumber();
            chiefStartedLatch.countDown();
        }
    };
    realtimeManager3 = new RealtimeManager(Arrays.asList(department_0, department_1), conglomerate, ImmutableMap.<String, Map<Integer, RealtimeManager.FireChief>>of("testing", ImmutableMap.of(0, fireChief_0, 1, fireChief_1)));
    startFireChiefWithPartitionNum(fireChief_0, 0);
    startFireChiefWithPartitionNum(fireChief_1, 1);
}
Also used : FirehoseV2(io.druid.data.input.FirehoseV2) RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) BaseQuery(io.druid.query.BaseQuery) Query(io.druid.query.Query) GroupByQuery(io.druid.query.groupby.GroupByQuery) FirehoseFactory(io.druid.data.input.FirehoseFactory) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) DateTime(org.joda.time.DateTime) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) QueryRunnerFactoryConglomerate(io.druid.query.QueryRunnerFactoryConglomerate) Sink(io.druid.segment.realtime.plumber.Sink) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Firehose(io.druid.data.input.Firehose) Period(org.joda.time.Period) IOException(java.io.IOException) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) DataSchema(io.druid.segment.indexing.DataSchema) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) Plumber(io.druid.segment.realtime.plumber.Plumber) ParseException(io.druid.java.util.common.parsers.ParseException) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) InputRowParser(io.druid.data.input.impl.InputRowParser) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) FirehoseFactoryV2(io.druid.data.input.FirehoseFactoryV2) Interval(org.joda.time.Interval) Before(org.junit.Before)

Aggregations

Sink (io.druid.segment.realtime.plumber.Sink)9 IOException (java.io.IOException)5 IndexSizeExceededException (io.druid.segment.incremental.IndexSizeExceededException)4 FireHydrant (io.druid.segment.realtime.FireHydrant)4 Function (com.google.common.base.Function)3 ISE (io.druid.java.util.common.ISE)3 Query (io.druid.query.Query)3 Map (java.util.Map)3 ExecutionException (java.util.concurrent.ExecutionException)3 ImmutableList (com.google.common.collect.ImmutableList)2 Pair (io.druid.java.util.common.Pair)2 Plumber (io.druid.segment.realtime.plumber.Plumber)2 File (java.io.File)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Stopwatch (com.google.common.base.Stopwatch)1 Supplier (com.google.common.base.Supplier)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 ServiceMetricEvent (com.metamx.emitter.service.ServiceMetricEvent)1 CachingQueryRunner (io.druid.client.CachingQueryRunner)1