Search in sources :

Example 11 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class SQLMetadataSegmentManager method enableDatasource.

@Override
public boolean enableDatasource(final String ds) {
    try {
        final IDBI dbi = connector.getDBI();
        VersionedIntervalTimeline<String, DataSegment> segmentTimeline = connector.inReadOnlyTransaction(new TransactionCallback<VersionedIntervalTimeline<String, DataSegment>>() {

            @Override
            public VersionedIntervalTimeline<String, DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
                return handle.createQuery(String.format("SELECT payload FROM %s WHERE dataSource = :dataSource", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).bind("dataSource", ds).map(ByteArrayMapper.FIRST).fold(new VersionedIntervalTimeline<String, DataSegment>(Ordering.natural()), new Folder3<VersionedIntervalTimeline<String, DataSegment>, byte[]>() {

                    @Override
                    public VersionedIntervalTimeline<String, DataSegment> fold(VersionedIntervalTimeline<String, DataSegment> timeline, byte[] payload, FoldController foldController, StatementContext statementContext) throws SQLException {
                        try {
                            final DataSegment segment = DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(payload, DataSegment.class));
                            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
                            return timeline;
                        } catch (Exception e) {
                            throw new SQLException(e.toString());
                        }
                    }
                });
            }
        });
        final List<DataSegment> segments = Lists.newArrayList();
        for (TimelineObjectHolder<String, DataSegment> objectHolder : segmentTimeline.lookup(new Interval("0000-01-01/3000-01-01"))) {
            for (PartitionChunk<DataSegment> partitionChunk : objectHolder.getObject()) {
                segments.add(partitionChunk.getObject());
            }
        }
        if (segments.isEmpty()) {
            log.warn("No segments found in the database!");
            return false;
        }
        dbi.withHandle(new HandleCallback<Void>() {

            @Override
            public Void withHandle(Handle handle) throws Exception {
                Batch batch = handle.createBatch();
                for (DataSegment segment : segments) {
                    batch.add(String.format("UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segment.getIdentifier()));
                }
                batch.execute();
                return null;
            }
        });
    } catch (Exception e) {
        log.error(e, "Exception enabling datasource %s", ds);
        return false;
    }
    return true;
}
Also used : IDBI(org.skife.jdbi.v2.IDBI) SQLException(java.sql.SQLException) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) DataSegment(io.druid.timeline.DataSegment) SQLException(java.sql.SQLException) IOException(java.io.IOException) Handle(org.skife.jdbi.v2.Handle) StatementContext(org.skife.jdbi.v2.StatementContext) FoldController(org.skife.jdbi.v2.FoldController) Batch(org.skife.jdbi.v2.Batch) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) Folder3(org.skife.jdbi.v2.Folder3) Interval(org.joda.time.Interval)

Example 12 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class SQLMetadataSegmentManager method poll.

@Override
public void poll() {
    try {
        if (!started) {
            return;
        }
        ConcurrentHashMap<String, DruidDataSource> newDataSources = new ConcurrentHashMap<String, DruidDataSource>();
        log.debug("Starting polling of segment table");
        // some databases such as PostgreSQL require auto-commit turned off
        // to stream results back, enabling transactions disables auto-commit
        //
        // setting connection to read-only will allow some database such as MySQL
        // to automatically use read-only transaction mode, further optimizing the query
        final List<DataSegment> segments = connector.inReadOnlyTransaction(new TransactionCallback<List<DataSegment>>() {

            @Override
            public List<DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
                return handle.createQuery(String.format("SELECT payload FROM %s WHERE used=true", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).map(new ResultSetMapper<DataSegment>() {

                    @Override
                    public DataSegment map(int index, ResultSet r, StatementContext ctx) throws SQLException {
                        try {
                            return DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(r.getBytes("payload"), DataSegment.class));
                        } catch (IOException e) {
                            log.makeAlert(e, "Failed to read segment from db.");
                            return null;
                        }
                    }
                }).list();
            }
        });
        if (segments == null || segments.isEmpty()) {
            log.warn("No segments found in the database!");
            return;
        }
        final Collection<DataSegment> segmentsFinal = Collections2.filter(segments, Predicates.notNull());
        log.info("Polled and found %,d segments in the database", segments.size());
        for (final DataSegment segment : segmentsFinal) {
            String datasourceName = segment.getDataSource();
            DruidDataSource dataSource = newDataSources.get(datasourceName);
            if (dataSource == null) {
                dataSource = new DruidDataSource(datasourceName, ImmutableMap.of("created", new DateTime().toString()));
                Object shouldBeNull = newDataSources.put(datasourceName, dataSource);
                if (shouldBeNull != null) {
                    log.warn("Just put key[%s] into dataSources and what was there wasn't null!?  It was[%s]", datasourceName, shouldBeNull);
                }
            }
            if (!dataSource.getSegments().contains(segment)) {
                dataSource.addSegment(segment.getIdentifier(), segment);
            }
        }
        synchronized (lock) {
            if (started) {
                dataSources.set(newDataSources);
            }
        }
    } catch (Exception e) {
        log.makeAlert(e, "Problem polling DB.").emit();
    }
}
Also used : SQLException(java.sql.SQLException) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) IOException(java.io.IOException) DruidDataSource(io.druid.client.DruidDataSource) DataSegment(io.druid.timeline.DataSegment) SQLException(java.sql.SQLException) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) Handle(org.skife.jdbi.v2.Handle) StatementContext(org.skife.jdbi.v2.StatementContext) ResultSet(java.sql.ResultSet) List(java.util.List) ArrayList(java.util.ArrayList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 13 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class AppenderatorPlumber method mergeAndPush.

private void mergeAndPush() {
    final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
    final Period windowPeriod = config.getWindowPeriod();
    final long windowMillis = windowPeriod.toStandardDuration().getMillis();
    log.info("Starting merge and push.");
    DateTime minTimestampAsDate = segmentGranularity.bucketStart(new DateTime(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis));
    long minTimestamp = minTimestampAsDate.getMillis();
    final List<SegmentIdentifier> appenderatorSegments = appenderator.getSegments();
    final List<SegmentIdentifier> segmentsToPush = Lists.newArrayList();
    if (shuttingDown) {
        log.info("Found [%,d] segments. Attempting to hand off all of them.", appenderatorSegments.size());
        segmentsToPush.addAll(appenderatorSegments);
    } else {
        log.info("Found [%,d] segments. Attempting to hand off segments that start before [%s].", appenderatorSegments.size(), minTimestampAsDate);
        for (SegmentIdentifier segment : appenderatorSegments) {
            final Long intervalStart = segment.getInterval().getStartMillis();
            if (intervalStart < minTimestamp) {
                log.info("Adding entry [%s] for merge and push.", segment);
                segmentsToPush.add(segment);
            } else {
                log.info("Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.", segment, new DateTime(intervalStart), minTimestampAsDate);
            }
        }
    }
    log.info("Found [%,d] segments to persist and merge", segmentsToPush.size());
    final Function<Throwable, Void> errorHandler = new Function<Throwable, Void>() {

        @Override
        public Void apply(Throwable throwable) {
            final List<String> segmentIdentifierStrings = Lists.transform(segmentsToPush, new Function<SegmentIdentifier, String>() {

                @Override
                public String apply(SegmentIdentifier input) {
                    return input.getIdentifierAsString();
                }
            });
            log.makeAlert(throwable, "Failed to publish merged indexes[%s]", schema.getDataSource()).addData("segments", segmentIdentifierStrings).emit();
            if (shuttingDown) {
                // We're trying to shut down, and these segments failed to push. Let's just get rid of them.
                // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                cleanShutdown = false;
                for (SegmentIdentifier identifier : segmentsToPush) {
                    dropSegment(identifier);
                }
            }
            return null;
        }
    };
    // WARNING: Committers.nil() here means that on-disk data can get out of sync with committing.
    Futures.addCallback(appenderator.push(segmentsToPush, Committers.nil()), new FutureCallback<SegmentsAndMetadata>() {

        @Override
        public void onSuccess(SegmentsAndMetadata result) {
            // Immediately publish after pushing
            for (DataSegment pushedSegment : result.getSegments()) {
                try {
                    segmentPublisher.publishSegment(pushedSegment);
                } catch (Exception e) {
                    errorHandler.apply(e);
                }
            }
            log.info("Published [%,d] sinks.", segmentsToPush.size());
        }

        @Override
        public void onFailure(Throwable e) {
            log.warn(e, "Failed to push [%,d] segments.", segmentsToPush.size());
            errorHandler.apply(e);
        }
    });
}
Also used : Period(org.joda.time.Period) Granularity(io.druid.java.util.common.granularity.Granularity) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) Function(com.google.common.base.Function)

Example 14 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class FiniteAppenderatorDriver method publishAll.

/**
   * Push and publish all segments to the metadata store.
   *
   * @param publisher        segment publisher
   * @param wrappedCommitter wrapped committer (from wrapCommitter)
   *
   * @return published segments and metadata, or null if segments could not be published due to transaction failure
   * with commit metadata.
   */
private SegmentsAndMetadata publishAll(final TransactionalSegmentPublisher publisher, final Committer wrappedCommitter) throws InterruptedException {
    final List<SegmentIdentifier> theSegments = ImmutableList.copyOf(appenderator.getSegments());
    long nTry = 0;
    while (true) {
        try {
            log.info("Pushing segments: [%s]", Joiner.on(", ").join(theSegments));
            final SegmentsAndMetadata segmentsAndMetadata = appenderator.push(theSegments, wrappedCommitter).get();
            // Sanity check
            if (!segmentsToIdentifiers(segmentsAndMetadata.getSegments()).equals(Sets.newHashSet(theSegments))) {
                throw new ISE("WTF?! Pushed different segments than requested. Pushed[%s], requested[%s].", Joiner.on(", ").join(identifiersToStrings(segmentsToIdentifiers(segmentsAndMetadata.getSegments()))), Joiner.on(", ").join(identifiersToStrings(theSegments)));
            }
            log.info("Publishing segments with commitMetadata[%s]: [%s]", segmentsAndMetadata.getCommitMetadata(), Joiner.on(", ").join(segmentsAndMetadata.getSegments()));
            if (segmentsAndMetadata.getSegments().isEmpty()) {
                log.info("Nothing to publish, skipping publish step.");
            } else {
                final boolean published = publisher.publishSegments(ImmutableSet.copyOf(segmentsAndMetadata.getSegments()), ((FiniteAppenderatorDriverMetadata) segmentsAndMetadata.getCommitMetadata()).getCallerMetadata());
                if (published) {
                    log.info("Published segments, awaiting handoff.");
                } else {
                    log.info("Transaction failure while publishing segments, checking if someone else beat us to it.");
                    if (usedSegmentChecker.findUsedSegments(segmentsToIdentifiers(segmentsAndMetadata.getSegments())).equals(Sets.newHashSet(segmentsAndMetadata.getSegments()))) {
                        log.info("Our segments really do exist, awaiting handoff.");
                    } else {
                        log.warn("Our segments don't exist, giving up.");
                        return null;
                    }
                }
            }
            for (final DataSegment dataSegment : segmentsAndMetadata.getSegments()) {
                handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().getPartitionNum()), MoreExecutors.sameThreadExecutor(), new Runnable() {

                    @Override
                    public void run() {
                        final SegmentIdentifier identifier = SegmentIdentifier.fromDataSegment(dataSegment);
                        log.info("Segment[%s] successfully handed off, dropping.", identifier);
                        metrics.incrementHandOffCount();
                        final ListenableFuture<?> dropFuture = appenderator.drop(identifier);
                        Futures.addCallback(dropFuture, new FutureCallback<Object>() {

                            @Override
                            public void onSuccess(Object result) {
                                synchronized (handoffMonitor) {
                                    handoffMonitor.notifyAll();
                                }
                            }

                            @Override
                            public void onFailure(Throwable e) {
                                log.warn(e, "Failed to drop segment[%s]?!");
                                synchronized (handoffMonitor) {
                                    handoffMonitor.notifyAll();
                                }
                            }
                        });
                    }
                });
            }
            return segmentsAndMetadata;
        } catch (InterruptedException e) {
            throw e;
        } catch (Exception e) {
            final long sleepMillis = computeNextRetrySleep(++nTry);
            log.warn(e, "Failed publishAll (try %d), retrying in %,dms.", nTry, sleepMillis);
            Thread.sleep(sleepMillis);
        }
    }
}
Also used : DataSegment(io.druid.timeline.DataSegment) IOException(java.io.IOException) SegmentDescriptor(io.druid.query.SegmentDescriptor) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ISE(io.druid.java.util.common.ISE) FutureCallback(com.google.common.util.concurrent.FutureCallback)

Example 15 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class AppenderatorImpl method mergeAndPush.

/**
   * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
   * be run in the single-threaded pushExecutor.
   *
   * @param identifier sink identifier
   * @param sink       sink to push
   *
   * @return segment descriptor, or null if the sink is no longer valid
   */
private DataSegment mergeAndPush(final SegmentIdentifier identifier, final Sink sink) {
    // Bail out if this sink is null or otherwise not what we expect.
    if (sinks.get(identifier) != sink) {
        log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
        return null;
    }
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    for (FireHydrant hydrant : sink) {
        if (sink.isWritable()) {
            throw new ISE("WTF?! Expected sink to be no longer writable before mergeAndPush. Segment[%s].", identifier);
        }
        synchronized (hydrant) {
            if (!hydrant.hasSwapped()) {
                throw new ISE("WTF?! Expected sink to be fully persisted before mergeAndPush. Segment[%s].", identifier);
            }
        }
    }
    try {
        if (descriptorFile.exists()) {
            // Already pushed.
            log.info("Segment[%s] already pushed.", identifier);
            return objectMapper.readValue(descriptorFile, DataSegment.class);
        }
        log.info("Pushing merged index for segment[%s].", identifier);
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        List<QueryableIndex> indexes = Lists.newArrayList();
        for (FireHydrant fireHydrant : sink) {
            Segment segment = fireHydrant.getSegment();
            final QueryableIndex queryableIndex = segment.asQueryableIndex();
            log.info("Adding hydrant[%s]", fireHydrant);
            indexes.add(queryableIndex);
        }
        final File mergedFile;
        mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, tuningConfig.getIndexSpec());
        QueryableIndex index = indexIO.loadIndex(mergedFile);
        DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
        objectMapper.writeValue(descriptorFile, segment);
        log.info("Pushed merged index for segment[%s], descriptor is: %s", identifier, segment);
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw Throwables.propagate(e);
    }
}
Also used : QueryableIndex(io.druid.segment.QueryableIndex) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File) DataSegment(io.druid.timeline.DataSegment) DataSegment(io.druid.timeline.DataSegment) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) Segment(io.druid.segment.Segment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

DataSegment (io.druid.timeline.DataSegment)296 Test (org.junit.Test)154 Interval (org.joda.time.Interval)136 File (java.io.File)56 DateTime (org.joda.time.DateTime)53 IOException (java.io.IOException)37 DruidServer (io.druid.client.DruidServer)36 Map (java.util.Map)35 ImmutableMap (com.google.common.collect.ImmutableMap)20 DruidDataSource (io.druid.client.DruidDataSource)19 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)18 List (java.util.List)18 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)16 Rule (io.druid.server.coordinator.rules.Rule)16 ForeverLoadRule (io.druid.server.coordinator.rules.ForeverLoadRule)14 IntervalDropRule (io.druid.server.coordinator.rules.IntervalDropRule)13 IntervalLoadRule (io.druid.server.coordinator.rules.IntervalLoadRule)13 GET (javax.ws.rs.GET)13 Produces (javax.ws.rs.Produces)13 Path (org.apache.hadoop.fs.Path)13