Search in sources :

Example 91 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinator method announceHistoricalSegments.

/**
   * {@inheritDoc}
   */
@Override
public SegmentPublishResult announceHistoricalSegments(final Set<DataSegment> segments, final DataSourceMetadata startMetadata, final DataSourceMetadata endMetadata) throws IOException {
    if (segments.isEmpty()) {
        throw new IllegalArgumentException("segment set must not be empty");
    }
    final String dataSource = segments.iterator().next().getDataSource();
    for (DataSegment segment : segments) {
        if (!dataSource.equals(segment.getDataSource())) {
            throw new IllegalArgumentException("segments must all be from the same dataSource");
        }
    }
    if ((startMetadata == null && endMetadata != null) || (startMetadata != null && endMetadata == null)) {
        throw new IllegalArgumentException("start/end metadata pair must be either null or non-null");
    }
    // Find which segments are used (i.e. not overshadowed).
    final Set<DataSegment> usedSegments = Sets.newHashSet();
    for (TimelineObjectHolder<String, DataSegment> holder : VersionedIntervalTimeline.forSegments(segments).lookup(JodaUtils.ETERNITY)) {
        for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
            usedSegments.add(chunk.getObject());
        }
    }
    final AtomicBoolean txnFailure = new AtomicBoolean(false);
    try {
        return connector.retryTransaction(new TransactionCallback<SegmentPublishResult>() {

            @Override
            public SegmentPublishResult inTransaction(final Handle handle, final TransactionStatus transactionStatus) throws Exception {
                final Set<DataSegment> inserted = Sets.newHashSet();
                if (startMetadata != null) {
                    final DataSourceMetadataUpdateResult result = updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata);
                    if (result != DataSourceMetadataUpdateResult.SUCCESS) {
                        transactionStatus.setRollbackOnly();
                        txnFailure.set(true);
                        if (result == DataSourceMetadataUpdateResult.FAILURE) {
                            throw new RuntimeException("Aborting transaction!");
                        } else if (result == DataSourceMetadataUpdateResult.TRY_AGAIN) {
                            throw new RetryTransactionException("Aborting transaction!");
                        }
                    }
                }
                for (final DataSegment segment : segments) {
                    if (announceHistoricalSegment(handle, segment, usedSegments.contains(segment))) {
                        inserted.add(segment);
                    }
                }
                return new SegmentPublishResult(ImmutableSet.copyOf(inserted), true);
            }
        }, 3, SQLMetadataConnector.DEFAULT_MAX_TRIES);
    } catch (CallbackFailedException e) {
        if (txnFailure.get()) {
            return new SegmentPublishResult(ImmutableSet.<DataSegment>of(), false);
        } else {
            throw e;
        }
    }
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) DataSegment(io.druid.timeline.DataSegment) SQLException(java.sql.SQLException) IOException(java.io.IOException) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) Handle(org.skife.jdbi.v2.Handle) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SegmentPublishResult(io.druid.indexing.overlord.SegmentPublishResult)

Example 92 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class SQLMetadataSegmentManager method enableDatasource.

@Override
public boolean enableDatasource(final String ds) {
    try {
        final IDBI dbi = connector.getDBI();
        VersionedIntervalTimeline<String, DataSegment> segmentTimeline = connector.inReadOnlyTransaction(new TransactionCallback<VersionedIntervalTimeline<String, DataSegment>>() {

            @Override
            public VersionedIntervalTimeline<String, DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
                return handle.createQuery(String.format("SELECT payload FROM %s WHERE dataSource = :dataSource", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).bind("dataSource", ds).map(ByteArrayMapper.FIRST).fold(new VersionedIntervalTimeline<String, DataSegment>(Ordering.natural()), new Folder3<VersionedIntervalTimeline<String, DataSegment>, byte[]>() {

                    @Override
                    public VersionedIntervalTimeline<String, DataSegment> fold(VersionedIntervalTimeline<String, DataSegment> timeline, byte[] payload, FoldController foldController, StatementContext statementContext) throws SQLException {
                        try {
                            final DataSegment segment = DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(payload, DataSegment.class));
                            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
                            return timeline;
                        } catch (Exception e) {
                            throw new SQLException(e.toString());
                        }
                    }
                });
            }
        });
        final List<DataSegment> segments = Lists.newArrayList();
        for (TimelineObjectHolder<String, DataSegment> objectHolder : segmentTimeline.lookup(new Interval("0000-01-01/3000-01-01"))) {
            for (PartitionChunk<DataSegment> partitionChunk : objectHolder.getObject()) {
                segments.add(partitionChunk.getObject());
            }
        }
        if (segments.isEmpty()) {
            log.warn("No segments found in the database!");
            return false;
        }
        dbi.withHandle(new HandleCallback<Void>() {

            @Override
            public Void withHandle(Handle handle) throws Exception {
                Batch batch = handle.createBatch();
                for (DataSegment segment : segments) {
                    batch.add(String.format("UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segment.getIdentifier()));
                }
                batch.execute();
                return null;
            }
        });
    } catch (Exception e) {
        log.error(e, "Exception enabling datasource %s", ds);
        return false;
    }
    return true;
}
Also used : IDBI(org.skife.jdbi.v2.IDBI) SQLException(java.sql.SQLException) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) DataSegment(io.druid.timeline.DataSegment) SQLException(java.sql.SQLException) IOException(java.io.IOException) Handle(org.skife.jdbi.v2.Handle) StatementContext(org.skife.jdbi.v2.StatementContext) FoldController(org.skife.jdbi.v2.FoldController) Batch(org.skife.jdbi.v2.Batch) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) Folder3(org.skife.jdbi.v2.Folder3) Interval(org.joda.time.Interval)

Example 93 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class SQLMetadataSegmentManager method poll.

@Override
public void poll() {
    try {
        if (!started) {
            return;
        }
        ConcurrentHashMap<String, DruidDataSource> newDataSources = new ConcurrentHashMap<String, DruidDataSource>();
        log.debug("Starting polling of segment table");
        // some databases such as PostgreSQL require auto-commit turned off
        // to stream results back, enabling transactions disables auto-commit
        //
        // setting connection to read-only will allow some database such as MySQL
        // to automatically use read-only transaction mode, further optimizing the query
        final List<DataSegment> segments = connector.inReadOnlyTransaction(new TransactionCallback<List<DataSegment>>() {

            @Override
            public List<DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
                return handle.createQuery(String.format("SELECT payload FROM %s WHERE used=true", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).map(new ResultSetMapper<DataSegment>() {

                    @Override
                    public DataSegment map(int index, ResultSet r, StatementContext ctx) throws SQLException {
                        try {
                            return DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(r.getBytes("payload"), DataSegment.class));
                        } catch (IOException e) {
                            log.makeAlert(e, "Failed to read segment from db.");
                            return null;
                        }
                    }
                }).list();
            }
        });
        if (segments == null || segments.isEmpty()) {
            log.warn("No segments found in the database!");
            return;
        }
        final Collection<DataSegment> segmentsFinal = Collections2.filter(segments, Predicates.notNull());
        log.info("Polled and found %,d segments in the database", segments.size());
        for (final DataSegment segment : segmentsFinal) {
            String datasourceName = segment.getDataSource();
            DruidDataSource dataSource = newDataSources.get(datasourceName);
            if (dataSource == null) {
                dataSource = new DruidDataSource(datasourceName, ImmutableMap.of("created", new DateTime().toString()));
                Object shouldBeNull = newDataSources.put(datasourceName, dataSource);
                if (shouldBeNull != null) {
                    log.warn("Just put key[%s] into dataSources and what was there wasn't null!?  It was[%s]", datasourceName, shouldBeNull);
                }
            }
            if (!dataSource.getSegments().contains(segment)) {
                dataSource.addSegment(segment.getIdentifier(), segment);
            }
        }
        synchronized (lock) {
            if (started) {
                dataSources.set(newDataSources);
            }
        }
    } catch (Exception e) {
        log.makeAlert(e, "Problem polling DB.").emit();
    }
}
Also used : SQLException(java.sql.SQLException) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) IOException(java.io.IOException) DruidDataSource(io.druid.client.DruidDataSource) DataSegment(io.druid.timeline.DataSegment) SQLException(java.sql.SQLException) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) Handle(org.skife.jdbi.v2.Handle) StatementContext(org.skife.jdbi.v2.StatementContext) ResultSet(java.sql.ResultSet) List(java.util.List) ArrayList(java.util.ArrayList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 94 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class AppenderatorPlumber method mergeAndPush.

private void mergeAndPush() {
    final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
    final Period windowPeriod = config.getWindowPeriod();
    final long windowMillis = windowPeriod.toStandardDuration().getMillis();
    log.info("Starting merge and push.");
    DateTime minTimestampAsDate = segmentGranularity.bucketStart(new DateTime(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis));
    long minTimestamp = minTimestampAsDate.getMillis();
    final List<SegmentIdentifier> appenderatorSegments = appenderator.getSegments();
    final List<SegmentIdentifier> segmentsToPush = Lists.newArrayList();
    if (shuttingDown) {
        log.info("Found [%,d] segments. Attempting to hand off all of them.", appenderatorSegments.size());
        segmentsToPush.addAll(appenderatorSegments);
    } else {
        log.info("Found [%,d] segments. Attempting to hand off segments that start before [%s].", appenderatorSegments.size(), minTimestampAsDate);
        for (SegmentIdentifier segment : appenderatorSegments) {
            final Long intervalStart = segment.getInterval().getStartMillis();
            if (intervalStart < minTimestamp) {
                log.info("Adding entry [%s] for merge and push.", segment);
                segmentsToPush.add(segment);
            } else {
                log.info("Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.", segment, new DateTime(intervalStart), minTimestampAsDate);
            }
        }
    }
    log.info("Found [%,d] segments to persist and merge", segmentsToPush.size());
    final Function<Throwable, Void> errorHandler = new Function<Throwable, Void>() {

        @Override
        public Void apply(Throwable throwable) {
            final List<String> segmentIdentifierStrings = Lists.transform(segmentsToPush, new Function<SegmentIdentifier, String>() {

                @Override
                public String apply(SegmentIdentifier input) {
                    return input.getIdentifierAsString();
                }
            });
            log.makeAlert(throwable, "Failed to publish merged indexes[%s]", schema.getDataSource()).addData("segments", segmentIdentifierStrings).emit();
            if (shuttingDown) {
                // We're trying to shut down, and these segments failed to push. Let's just get rid of them.
                // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                cleanShutdown = false;
                for (SegmentIdentifier identifier : segmentsToPush) {
                    dropSegment(identifier);
                }
            }
            return null;
        }
    };
    // WARNING: Committers.nil() here means that on-disk data can get out of sync with committing.
    Futures.addCallback(appenderator.push(segmentsToPush, Committers.nil()), new FutureCallback<SegmentsAndMetadata>() {

        @Override
        public void onSuccess(SegmentsAndMetadata result) {
            // Immediately publish after pushing
            for (DataSegment pushedSegment : result.getSegments()) {
                try {
                    segmentPublisher.publishSegment(pushedSegment);
                } catch (Exception e) {
                    errorHandler.apply(e);
                }
            }
            log.info("Published [%,d] sinks.", segmentsToPush.size());
        }

        @Override
        public void onFailure(Throwable e) {
            log.warn(e, "Failed to push [%,d] segments.", segmentsToPush.size());
            errorHandler.apply(e);
        }
    });
}
Also used : Period(org.joda.time.Period) Granularity(io.druid.java.util.common.granularity.Granularity) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) Function(com.google.common.base.Function)

Example 95 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class FiniteAppenderatorDriver method publishAll.

/**
   * Push and publish all segments to the metadata store.
   *
   * @param publisher        segment publisher
   * @param wrappedCommitter wrapped committer (from wrapCommitter)
   *
   * @return published segments and metadata, or null if segments could not be published due to transaction failure
   * with commit metadata.
   */
private SegmentsAndMetadata publishAll(final TransactionalSegmentPublisher publisher, final Committer wrappedCommitter) throws InterruptedException {
    final List<SegmentIdentifier> theSegments = ImmutableList.copyOf(appenderator.getSegments());
    long nTry = 0;
    while (true) {
        try {
            log.info("Pushing segments: [%s]", Joiner.on(", ").join(theSegments));
            final SegmentsAndMetadata segmentsAndMetadata = appenderator.push(theSegments, wrappedCommitter).get();
            // Sanity check
            if (!segmentsToIdentifiers(segmentsAndMetadata.getSegments()).equals(Sets.newHashSet(theSegments))) {
                throw new ISE("WTF?! Pushed different segments than requested. Pushed[%s], requested[%s].", Joiner.on(", ").join(identifiersToStrings(segmentsToIdentifiers(segmentsAndMetadata.getSegments()))), Joiner.on(", ").join(identifiersToStrings(theSegments)));
            }
            log.info("Publishing segments with commitMetadata[%s]: [%s]", segmentsAndMetadata.getCommitMetadata(), Joiner.on(", ").join(segmentsAndMetadata.getSegments()));
            if (segmentsAndMetadata.getSegments().isEmpty()) {
                log.info("Nothing to publish, skipping publish step.");
            } else {
                final boolean published = publisher.publishSegments(ImmutableSet.copyOf(segmentsAndMetadata.getSegments()), ((FiniteAppenderatorDriverMetadata) segmentsAndMetadata.getCommitMetadata()).getCallerMetadata());
                if (published) {
                    log.info("Published segments, awaiting handoff.");
                } else {
                    log.info("Transaction failure while publishing segments, checking if someone else beat us to it.");
                    if (usedSegmentChecker.findUsedSegments(segmentsToIdentifiers(segmentsAndMetadata.getSegments())).equals(Sets.newHashSet(segmentsAndMetadata.getSegments()))) {
                        log.info("Our segments really do exist, awaiting handoff.");
                    } else {
                        log.warn("Our segments don't exist, giving up.");
                        return null;
                    }
                }
            }
            for (final DataSegment dataSegment : segmentsAndMetadata.getSegments()) {
                handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().getPartitionNum()), MoreExecutors.sameThreadExecutor(), new Runnable() {

                    @Override
                    public void run() {
                        final SegmentIdentifier identifier = SegmentIdentifier.fromDataSegment(dataSegment);
                        log.info("Segment[%s] successfully handed off, dropping.", identifier);
                        metrics.incrementHandOffCount();
                        final ListenableFuture<?> dropFuture = appenderator.drop(identifier);
                        Futures.addCallback(dropFuture, new FutureCallback<Object>() {

                            @Override
                            public void onSuccess(Object result) {
                                synchronized (handoffMonitor) {
                                    handoffMonitor.notifyAll();
                                }
                            }

                            @Override
                            public void onFailure(Throwable e) {
                                log.warn(e, "Failed to drop segment[%s]?!");
                                synchronized (handoffMonitor) {
                                    handoffMonitor.notifyAll();
                                }
                            }
                        });
                    }
                });
            }
            return segmentsAndMetadata;
        } catch (InterruptedException e) {
            throw e;
        } catch (Exception e) {
            final long sleepMillis = computeNextRetrySleep(++nTry);
            log.warn(e, "Failed publishAll (try %d), retrying in %,dms.", nTry, sleepMillis);
            Thread.sleep(sleepMillis);
        }
    }
}
Also used : DataSegment(io.druid.timeline.DataSegment) IOException(java.io.IOException) SegmentDescriptor(io.druid.query.SegmentDescriptor) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ISE(io.druid.java.util.common.ISE) FutureCallback(com.google.common.util.concurrent.FutureCallback)

Aggregations

DataSegment (io.druid.timeline.DataSegment)293 Test (org.junit.Test)151 Interval (org.joda.time.Interval)136 File (java.io.File)56 DateTime (org.joda.time.DateTime)52 IOException (java.io.IOException)37 DruidServer (io.druid.client.DruidServer)36 Map (java.util.Map)35 DruidDataSource (io.druid.client.DruidDataSource)19 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)18 List (java.util.List)17 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)16 Rule (io.druid.server.coordinator.rules.Rule)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 ForeverLoadRule (io.druid.server.coordinator.rules.ForeverLoadRule)14 IntervalDropRule (io.druid.server.coordinator.rules.IntervalDropRule)13 IntervalLoadRule (io.druid.server.coordinator.rules.IntervalLoadRule)13 CountDownLatch (java.util.concurrent.CountDownLatch)13 GET (javax.ws.rs.GET)13 Produces (javax.ws.rs.Produces)13