Search in sources :

Example 6 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class YeOldePlumberSchool method findPlumber.

@Override
public Plumber findPlumber(final DataSchema schema, final RealtimeTuningConfig config, final FireDepartmentMetrics metrics) {
    // There can be only one.
    final Sink theSink = new Sink(interval, schema, config.getShardSpec(), version, config.getMaxRowsInMemory(), config.isReportParseExceptions());
    // Temporary directory to hold spilled segments.
    final File persistDir = new File(tmpSegmentDir, theSink.getSegment().getIdentifier());
    // Set of spilled segments. Will be merged at the end.
    final Set<File> spilled = Sets.newHashSet();
    // IndexMerger implementation.
    final IndexMerger theIndexMerger = config.getBuildV9Directly() ? indexMergerV9 : indexMerger;
    return new Plumber() {

        @Override
        public Object startJob() {
            return null;
        }

        @Override
        public int add(InputRow row, Supplier<Committer> committerSupplier) throws IndexSizeExceededException {
            Sink sink = getSink(row.getTimestampFromEpoch());
            if (sink == null) {
                return -1;
            }
            final int numRows = sink.add(row);
            if (!sink.canAppendRow()) {
                persist(committerSupplier.get());
            }
            return numRows;
        }

        private Sink getSink(long timestamp) {
            if (theSink.getInterval().contains(timestamp)) {
                return theSink;
            } else {
                return null;
            }
        }

        @Override
        public <T> QueryRunner<T> getQueryRunner(Query<T> query) {
            throw new UnsupportedOperationException("Don't query me, bro.");
        }

        @Override
        public void persist(Committer committer) {
            spillIfSwappable();
            committer.run();
        }

        @Override
        public void finishJob() {
            // The segment we will upload
            File fileToUpload = null;
            try {
                // User should have persisted everything by now.
                Preconditions.checkState(!theSink.swappable(), "All data must be persisted before fininshing the job!");
                if (spilled.size() == 0) {
                    throw new IllegalStateException("Nothing indexed?");
                } else if (spilled.size() == 1) {
                    fileToUpload = Iterables.getOnlyElement(spilled);
                } else {
                    List<QueryableIndex> indexes = Lists.newArrayList();
                    for (final File oneSpill : spilled) {
                        indexes.add(indexIO.loadIndex(oneSpill));
                    }
                    fileToUpload = new File(tmpSegmentDir, "merged");
                    theIndexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), fileToUpload, config.getIndexSpec());
                }
                // Map merged segment so we can extract dimensions
                final QueryableIndex mappedSegment = indexIO.loadIndex(fileToUpload);
                final DataSegment segmentToUpload = theSink.getSegment().withDimensions(ImmutableList.copyOf(mappedSegment.getAvailableDimensions())).withBinaryVersion(SegmentUtils.getVersionFromDir(fileToUpload));
                dataSegmentPusher.push(fileToUpload, segmentToUpload);
                log.info("Uploaded segment[%s]", segmentToUpload.getIdentifier());
            } catch (Exception e) {
                log.warn(e, "Failed to merge and upload");
                throw Throwables.propagate(e);
            } finally {
                try {
                    if (fileToUpload != null) {
                        log.info("Deleting Index File[%s]", fileToUpload);
                        FileUtils.deleteDirectory(fileToUpload);
                    }
                } catch (IOException e) {
                    log.warn(e, "Error deleting directory[%s]", fileToUpload);
                }
            }
        }

        private void spillIfSwappable() {
            if (theSink.swappable()) {
                final FireHydrant indexToPersist = theSink.swap();
                final int rowsToPersist = indexToPersist.getIndex().size();
                final File dirToPersist = getSpillDir(indexToPersist.getCount());
                log.info("Spilling index[%d] with rows[%d] to: %s", indexToPersist.getCount(), rowsToPersist, dirToPersist);
                try {
                    theIndexMerger.persist(indexToPersist.getIndex(), dirToPersist, config.getIndexSpec());
                    indexToPersist.swapSegment(null);
                    metrics.incrementRowOutputCount(rowsToPersist);
                    spilled.add(dirToPersist);
                } catch (Exception e) {
                    log.warn(e, "Failed to spill index[%d]", indexToPersist.getCount());
                    throw Throwables.propagate(e);
                }
            }
        }

        private File getSpillDir(final int n) {
            return new File(persistDir, String.format("spill%d", n));
        }
    };
}
Also used : IndexMerger(io.druid.segment.IndexMerger) Query(io.druid.query.Query) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) Sink(io.druid.segment.realtime.plumber.Sink) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) Plumber(io.druid.segment.realtime.plumber.Plumber) Supplier(com.google.common.base.Supplier) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Committer(io.druid.data.input.Committer) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File)

Example 7 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class SinkTest method testSwap.

@Test
public void testSwap() throws Exception {
    final DataSchema schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), new DefaultObjectMapper());
    final Interval interval = new Interval("2013-01-01/2013-01-02");
    final String version = new DateTime().toString();
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(100, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
    final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions());
    sink.add(new InputRow() {

        @Override
        public List<String> getDimensions() {
            return Lists.newArrayList();
        }

        @Override
        public long getTimestampFromEpoch() {
            return new DateTime("2013-01-01").getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return new DateTime("2013-01-01");
        }

        @Override
        public List<String> getDimension(String dimension) {
            return Lists.newArrayList();
        }

        @Override
        public float getFloatMetric(String metric) {
            return 0;
        }

        @Override
        public long getLongMetric(String metric) {
            return 0L;
        }

        @Override
        public Object getRaw(String dimension) {
            return null;
        }

        @Override
        public int compareTo(Row o) {
            return 0;
        }
    });
    FireHydrant currHydrant = sink.getCurrHydrant();
    Assert.assertEquals(new Interval("2013-01-01/PT1M"), currHydrant.getIndex().getInterval());
    FireHydrant swapHydrant = sink.swap();
    sink.add(new InputRow() {

        @Override
        public List<String> getDimensions() {
            return Lists.newArrayList();
        }

        @Override
        public long getTimestampFromEpoch() {
            return new DateTime("2013-01-01").getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return new DateTime("2013-01-01");
        }

        @Override
        public List<String> getDimension(String dimension) {
            return Lists.newArrayList();
        }

        @Override
        public float getFloatMetric(String metric) {
            return 0;
        }

        @Override
        public long getLongMetric(String metric) {
            return 0L;
        }

        @Override
        public Object getRaw(String dimension) {
            return null;
        }

        @Override
        public int compareTo(Row o) {
            return 0;
        }
    });
    Assert.assertEquals(currHydrant, swapHydrant);
    Assert.assertNotSame(currHydrant, sink.getCurrHydrant());
    Assert.assertEquals(new Interval("2013-01-01/PT1M"), sink.getCurrHydrant().getIndex().getInterval());
    Assert.assertEquals(2, Iterators.size(sink.iterator()));
}
Also used : Period(org.joda.time.Period) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) DateTime(org.joda.time.DateTime) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) InputRow(io.druid.data.input.InputRow) List(java.util.List) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) InputRow(io.druid.data.input.InputRow) Row(io.druid.data.input.Row) FireHydrant(io.druid.segment.realtime.FireHydrant) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 8 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class RealtimePlumberSchoolTest method testPersistHydrantGapsHelper.

private void testPersistHydrantGapsHelper(final Object commitMetadata) throws Exception {
    Interval testInterval = new Interval(new DateTime("1970-01-01"), new DateTime("1971-01-01"));
    RealtimePlumber plumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    plumber2.getSinks().put(0L, new Sink(testInterval, schema2, tuningConfig.getShardSpec(), new DateTime("2014-12-01T12:34:56.789").toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
    Assert.assertNull(plumber2.startJob());
    final CountDownLatch doneSignal = new CountDownLatch(1);
    final Committer committer = new Committer() {

        @Override
        public Object getMetadata() {
            return commitMetadata;
        }

        @Override
        public void run() {
            doneSignal.countDown();
        }
    };
    plumber2.add(getTestInputRow("1970-01-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-02-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-03-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-04-01"), Suppliers.ofInstance(committer));
    plumber2.add(getTestInputRow("1970-05-01"), Suppliers.ofInstance(committer));
    plumber2.persist(committer);
    doneSignal.await();
    plumber2.getSinks().clear();
    plumber2.finishJob();
    File persistDir = plumber2.computePersistDir(schema2, testInterval);
    /* Check that all hydrants were persisted */
    for (int i = 0; i < 5; i++) {
        Assert.assertTrue(new File(persistDir, String.valueOf(i)).exists());
    }
    /* Create some gaps in the persisted hydrants and reload */
    FileUtils.deleteDirectory(new File(persistDir, "1"));
    FileUtils.deleteDirectory(new File(persistDir, "3"));
    RealtimePlumber restoredPlumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    restoredPlumber.bootstrapSinksFromDisk();
    Map<Long, Sink> sinks = restoredPlumber.getSinks();
    Assert.assertEquals(1, sinks.size());
    List<FireHydrant> hydrants = Lists.newArrayList(sinks.get(new Long(0)));
    DateTime startTime = new DateTime("1970-01-01T00:00:00.000Z");
    Interval expectedInterval = new Interval(startTime, new DateTime("1971-01-01T00:00:00.000Z"));
    Assert.assertEquals(0, hydrants.get(0).getCount());
    Assert.assertEquals(expectedInterval, hydrants.get(0).getSegment().getDataInterval());
    Assert.assertEquals(2, hydrants.get(1).getCount());
    Assert.assertEquals(expectedInterval, hydrants.get(1).getSegment().getDataInterval());
    Assert.assertEquals(4, hydrants.get(2).getCount());
    Assert.assertEquals(expectedInterval, hydrants.get(2).getSegment().getDataInterval());
    /* Delete all the hydrants and reload, no sink should be created */
    FileUtils.deleteDirectory(new File(persistDir, "0"));
    FileUtils.deleteDirectory(new File(persistDir, "2"));
    FileUtils.deleteDirectory(new File(persistDir, "4"));
    RealtimePlumber restoredPlumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
    restoredPlumber2.bootstrapSinksFromDisk();
    Assert.assertEquals(0, restoredPlumber2.getSinks().size());
}
Also used : Committer(io.druid.data.input.Committer) FireHydrant(io.druid.segment.realtime.FireHydrant) CountDownLatch(java.util.concurrent.CountDownLatch) File(java.io.File) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval)

Example 9 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class Sink method makeNewCurrIndex.

private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) {
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withTimestampSpec(schema.getParser()).withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()).withDimensionsSpec(schema.getParser()).withMetrics(schema.getAggregators()).withRollup(schema.getGranularitySpec().isRollup()).build();
    final IncrementalIndex newIndex = new OnheapIncrementalIndex(indexSchema, reportParseExceptions, maxRowsInMemory);
    final FireHydrant old;
    synchronized (hydrantLock) {
        if (writable) {
            old = currHydrant;
            int newCount = 0;
            int numHydrants = hydrants.size();
            if (numHydrants > 0) {
                FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
                newCount = lastHydrant.getCount() + 1;
                if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
                    Map<String, ColumnCapabilitiesImpl> oldCapabilities;
                    if (lastHydrant.hasSwapped()) {
                        oldCapabilities = Maps.newHashMap();
                        QueryableIndex oldIndex = lastHydrant.getSegment().asQueryableIndex();
                        for (String dim : oldIndex.getAvailableDimensions()) {
                            dimOrder.add(dim);
                            oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumn(dim).getCapabilities());
                        }
                    } else {
                        IncrementalIndex oldIndex = lastHydrant.getIndex();
                        dimOrder.addAll(oldIndex.getDimensionOrder());
                        oldCapabilities = oldIndex.getColumnCapabilities();
                    }
                    newIndex.loadDimensionIterable(dimOrder, oldCapabilities);
                }
            }
            currHydrant = new FireHydrant(newIndex, newCount, getSegment().getIdentifier());
            if (old != null) {
                numRowsExcludingCurrIndex.addAndGet(old.getIndex().size());
            }
            hydrants.add(currHydrant);
        } else {
            // Oops, someone called finishWriting while we were making this new index.
            newIndex.close();
            throw new ISE("finishWriting() called during swap");
        }
    }
    return old;
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) QueryableIndex(io.druid.segment.QueryableIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) ColumnCapabilitiesImpl(io.druid.segment.column.ColumnCapabilitiesImpl)

Example 10 with FireHydrant

use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.

the class RealtimePlumber method persistAndMerge.

// Submits persist-n-merge task for a Sink to the mergeExecutor
private void persistAndMerge(final long truncatedTime, final Sink sink) {
    final String threadName = String.format("%s-%s-persist-n-merge", schema.getDataSource(), new DateTime(truncatedTime));
    mergeExecutor.execute(new ThreadRenamingRunnable(threadName) {

        final Interval interval = sink.getInterval();

        Stopwatch mergeStopwatch = null;

        @Override
        public void doRun() {
            try {
                // Bail out if this sink has been abandoned by a previously-executed task.
                if (sinks.get(truncatedTime) != sink) {
                    log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink);
                    return;
                }
                // Use a file to indicate that pushing has completed.
                final File persistDir = computePersistDir(schema, interval);
                final File mergedTarget = new File(persistDir, "merged");
                final File isPushedMarker = new File(persistDir, "isPushedMarker");
                if (!isPushedMarker.exists()) {
                    removeSegment(sink, mergedTarget);
                    if (mergedTarget.exists()) {
                        log.wtf("Merged target[%s] exists?!", mergedTarget);
                        return;
                    }
                } else {
                    log.info("Already pushed sink[%s]", sink);
                    return;
                }
                /*
            Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these
            hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural
            changes to plumber.
             */
                for (FireHydrant hydrant : sink) {
                    synchronized (hydrant) {
                        if (!hydrant.hasSwapped()) {
                            log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink);
                            final int rowCount = persistHydrant(hydrant, schema, interval, null);
                            metrics.incrementRowOutputCount(rowCount);
                        }
                    }
                }
                final long mergeThreadCpuTime = VMUtils.safeGetThreadCpuTime();
                mergeStopwatch = Stopwatch.createStarted();
                List<QueryableIndex> indexes = Lists.newArrayList();
                for (FireHydrant fireHydrant : sink) {
                    Segment segment = fireHydrant.getSegment();
                    final QueryableIndex queryableIndex = segment.asQueryableIndex();
                    log.info("Adding hydrant[%s]", fireHydrant);
                    indexes.add(queryableIndex);
                }
                final File mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, config.getIndexSpec());
                // emit merge metrics before publishing segment
                metrics.incrementMergeCpuTime(VMUtils.safeGetThreadCpuTime() - mergeThreadCpuTime);
                metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS));
                QueryableIndex index = indexIO.loadIndex(mergedFile);
                log.info("Pushing [%s] to deep storage", sink.getSegment().getIdentifier());
                DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
                log.info("Inserting [%s] to the metadata store", sink.getSegment().getIdentifier());
                segmentPublisher.publishSegment(segment);
                if (!isPushedMarker.createNewFile()) {
                    log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()).addData("interval", sink.getInterval()).addData("partitionNum", segment.getShardSpec().getPartitionNum()).addData("marker", isPushedMarker).emit();
                }
            } catch (Exception e) {
                metrics.incrementFailedHandoffs();
                log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit();
                if (shuttingDown) {
                    // We're trying to shut down, and this segment failed to push. Let's just get rid of it.
                    // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                    cleanShutdown = false;
                    abandonSegment(truncatedTime, sink);
                }
            } finally {
                if (mergeStopwatch != null) {
                    mergeStopwatch.stop();
                }
            }
        }
    });
    handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), mergeExecutor, new Runnable() {

        @Override
        public void run() {
            abandonSegment(sink.getInterval().getStartMillis(), sink);
            metrics.incrementHandOffCount();
        }
    });
}
Also used : Stopwatch(com.google.common.base.Stopwatch) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) DataSegment(io.druid.timeline.DataSegment) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) Segment(io.druid.segment.Segment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) QueryableIndex(io.druid.segment.QueryableIndex) SegmentDescriptor(io.druid.query.SegmentDescriptor) ThreadRenamingRunnable(io.druid.common.guava.ThreadRenamingRunnable) List(java.util.List) FireHydrant(io.druid.segment.realtime.FireHydrant) ThreadRenamingRunnable(io.druid.common.guava.ThreadRenamingRunnable) File(java.io.File) Interval(org.joda.time.Interval)

Aggregations

FireHydrant (io.druid.segment.realtime.FireHydrant)12 IndexSizeExceededException (io.druid.segment.incremental.IndexSizeExceededException)7 IOException (java.io.IOException)7 QueryableIndex (io.druid.segment.QueryableIndex)6 File (java.io.File)6 ISE (io.druid.java.util.common.ISE)4 QueryableIndexSegment (io.druid.segment.QueryableIndexSegment)4 List (java.util.List)4 Interval (org.joda.time.Interval)4 Function (com.google.common.base.Function)3 Committer (io.druid.data.input.Committer)3 Sink (io.druid.segment.realtime.plumber.Sink)3 ExecutionException (java.util.concurrent.ExecutionException)3 DateTime (org.joda.time.DateTime)3 Stopwatch (com.google.common.base.Stopwatch)2 ImmutableList (com.google.common.collect.ImmutableList)2 InputRow (io.druid.data.input.InputRow)2 Pair (io.druid.java.util.common.Pair)2 Query (io.druid.query.Query)2 SegmentDescriptor (io.druid.query.SegmentDescriptor)2