Examples with SegmentIdWithShardSpec - org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec

Example 11 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinator method checkAndGetExistingSegmentId.

private CheckExistingSegmentIdResult checkAndGetExistingSegmentId(final Query<Map<String, Object>> query, final Interval interval, final String sequenceName, @Nullable final String previousSegmentId, final Pair<String, String>... queryVars) throws IOException {
    Query<Map<String, Object>> boundQuery = query;
    for (Pair<String, String> var : queryVars) {
        boundQuery = boundQuery.bind(var.lhs, var.rhs);
    }
    final List<byte[]> existingBytes = boundQuery.map(ByteArrayMapper.FIRST).list();
    if (!existingBytes.isEmpty()) {
        final SegmentIdWithShardSpec existingIdentifier = jsonMapper.readValue(Iterables.getOnlyElement(existingBytes), SegmentIdWithShardSpec.class);
        if (existingIdentifier.getInterval().getStartMillis() == interval.getStartMillis() && existingIdentifier.getInterval().getEndMillis() == interval.getEndMillis()) {
            if (previousSegmentId == null) {
                log.info("Found existing pending segment [%s] for sequence[%s] in DB", existingIdentifier, sequenceName);
            } else {
                log.info("Found existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB", existingIdentifier, sequenceName, previousSegmentId);
            }
            return new CheckExistingSegmentIdResult(true, existingIdentifier);
        } else {
            if (previousSegmentId == null) {
                log.warn("Cannot use existing pending segment [%s] for sequence[%s] in DB, " + "does not match requested interval[%s]", existingIdentifier, sequenceName, interval);
            } else {
                log.warn("Cannot use existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB, " + "does not match requested interval[%s]", existingIdentifier, sequenceName, previousSegmentId, interval);
            }
            return new CheckExistingSegmentIdResult(true, null);
        }
    }
    return new CheckExistingSegmentIdResult(false, null);
}

Also used : Map(java.util.Map) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)

Example 12 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class SegmentAllocateAction method perform.

@Override
public SegmentIdWithShardSpec perform(final Task task, final TaskActionToolbox toolbox) {
    int attempt = 0;
    while (true) {
        attempt++;
        if (!task.getDataSource().equals(dataSource)) {
            throw new IAE("Task dataSource must match action dataSource, [%s] != [%s].", task.getDataSource(), dataSource);
        }
        final IndexerMetadataStorageCoordinator msc = toolbox.getIndexerMetadataStorageCoordinator();
        // 1) if something overlaps our timestamp, use that
        // 2) otherwise try preferredSegmentGranularity & going progressively smaller
        final Interval rowInterval = queryGranularity.bucket(timestamp).withChronology(ISOChronology.getInstanceUTC());
        final Set<DataSegment> usedSegmentsForRow = new HashSet<>(msc.retrieveUsedSegmentsForInterval(dataSource, rowInterval, Segments.ONLY_VISIBLE));
        final SegmentIdWithShardSpec identifier;
        if (usedSegmentsForRow.isEmpty()) {
            identifier = tryAllocateFirstSegment(toolbox, task, rowInterval);
        } else {
            identifier = tryAllocateSubsequentSegment(toolbox, task, rowInterval, usedSegmentsForRow.iterator().next());
        }
        if (identifier != null) {
            return identifier;
        }
        // Could not allocate a pending segment. There's a chance that this is because someone else inserted a segment
        // overlapping with this row between when we called "msc.retrieveUsedSegmentsForInterval" and now. Check it again,
        // and if it's different, repeat.
        Set<DataSegment> newUsedSegmentsForRow = new HashSet<>(msc.retrieveUsedSegmentsForInterval(dataSource, rowInterval, Segments.ONLY_VISIBLE));
        if (!newUsedSegmentsForRow.equals(usedSegmentsForRow)) {
            if (attempt < MAX_ATTEMPTS) {
                final long shortRandomSleep = 50 + (long) (ThreadLocalRandom.current().nextDouble() * 450);
                log.debug("Used segment set changed for rowInterval[%s]. Retrying segment allocation in %,dms (attempt = %,d).", rowInterval, shortRandomSleep, attempt);
                try {
                    Thread.sleep(shortRandomSleep);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw new RuntimeException(e);
                }
            } else {
                log.error("Used segment set changed for rowInterval[%s]. Not trying again (attempt = %,d).", rowInterval, attempt);
                return null;
            }
        } else {
            return null;
        }
    }
}

Also used : IndexerMetadataStorageCoordinator(org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator) IAE(org.apache.druid.java.util.common.IAE) DataSegment(org.apache.druid.timeline.DataSegment) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Interval(org.joda.time.Interval) HashSet(java.util.HashSet)

Example 13 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class TaskLockbox method tryLock.

/**
 * Attempt to acquire a lock for a task, without removing it from the queue. Can safely be called multiple times on
 * the same task until the lock is preempted.
 *
 * @return {@link LockResult} containing a new or an existing lock if succeeded. Otherwise, {@link LockResult} with a
 * {@link LockResult#revoked} flag.
 *
 * @throws IllegalStateException if the task is not a valid active task
 */
public LockResult tryLock(final Task task, final LockRequest request) {
    giant.lock();
    try {
        if (!activeTasks.contains(task.getId())) {
            throw new ISE("Unable to grant lock to inactive Task [%s]", task.getId());
        }
        Preconditions.checkArgument(request.getInterval().toDurationMillis() > 0, "interval empty");
        SegmentIdWithShardSpec newSegmentId = null;
        final LockRequest convertedRequest;
        if (request instanceof LockRequestForNewSegment) {
            final LockRequestForNewSegment lockRequestForNewSegment = (LockRequestForNewSegment) request;
            if (lockRequestForNewSegment.getGranularity() == LockGranularity.SEGMENT) {
                newSegmentId = allocateSegmentId(lockRequestForNewSegment, request.getVersion());
                if (newSegmentId == null) {
                    return LockResult.fail();
                }
                convertedRequest = new SpecificSegmentLockRequest(lockRequestForNewSegment, newSegmentId);
            } else {
                convertedRequest = new TimeChunkLockRequest(lockRequestForNewSegment);
            }
        } else {
            convertedRequest = request;
        }
        final TaskLockPosse posseToUse = createOrFindLockPosse(convertedRequest);
        if (posseToUse != null && !posseToUse.getTaskLock().isRevoked()) {
            if (request instanceof LockRequestForNewSegment) {
                final LockRequestForNewSegment lockRequestForNewSegment = (LockRequestForNewSegment) request;
                if (lockRequestForNewSegment.getGranularity() == LockGranularity.TIME_CHUNK) {
                    if (newSegmentId != null) {
                        throw new ISE("SegmentId must be allocated after getting a timeChunk lock," + " but we already have [%s] before getting the lock?", newSegmentId);
                    }
                    newSegmentId = allocateSegmentId(lockRequestForNewSegment, posseToUse.getTaskLock().getVersion());
                }
            }
            // Add to existing TaskLockPosse, if necessary
            if (posseToUse.addTask(task)) {
                log.info("Added task[%s] to TaskLock[%s]", task.getId(), posseToUse.getTaskLock());
                // Update task storage facility. If it fails, revoke the lock.
                try {
                    taskStorage.addLock(task.getId(), posseToUse.getTaskLock());
                    return LockResult.ok(posseToUse.getTaskLock(), newSegmentId);
                } catch (Exception e) {
                    log.makeAlert("Failed to persist lock in storage").addData("task", task.getId()).addData("dataSource", posseToUse.getTaskLock().getDataSource()).addData("interval", posseToUse.getTaskLock().getInterval()).addData("version", posseToUse.getTaskLock().getVersion()).emit();
                    unlock(task, convertedRequest.getInterval(), posseToUse.getTaskLock().getGranularity() == LockGranularity.SEGMENT ? ((SegmentLock) posseToUse.taskLock).getPartitionId() : null);
                    return LockResult.fail();
                }
            } else {
                log.info("Task[%s] already present in TaskLock[%s]", task.getId(), posseToUse.getTaskLock().getGroupId());
                return LockResult.ok(posseToUse.getTaskLock(), newSegmentId);
            }
        } else {
            final boolean lockRevoked = posseToUse != null && posseToUse.getTaskLock().isRevoked();
            if (lockRevoked) {
                return LockResult.revoked(posseToUse.getTaskLock());
            }
            return LockResult.fail();
        }
    } finally {
        giant.unlock();
    }
}

Also used : ISE(org.apache.druid.java.util.common.ISE) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)

Example 14 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class ParallelIndexSupervisorTask method allocateSegment.

// Internal APIs
/**
 * Allocate a new {@link SegmentIdWithShardSpec} for a request from {@link SinglePhaseSubTask}.
 * The returned segmentIdentifiers have different {@code partitionNum} (thereby different {@link NumberedShardSpec})
 * per bucket interval.
 */
@POST
@Path("/segment/allocate")
@Produces(SmileMediaTypes.APPLICATION_JACKSON_SMILE)
@Consumes(SmileMediaTypes.APPLICATION_JACKSON_SMILE)
public Response allocateSegment(Object param, @Context final HttpServletRequest req) {
    ChatHandlers.authorizationCheck(req, Action.READ, getDataSource(), authorizerMapper);
    if (toolbox == null) {
        return Response.status(Response.Status.SERVICE_UNAVAILABLE).entity("task is not running yet").build();
    }
    ParallelIndexTaskRunner runner = Preconditions.checkNotNull(getCurrentRunner(), "runner");
    if (!(runner instanceof SinglePhaseParallelIndexTaskRunner)) {
        throw new ISE("Expected [%s], but [%s] is in use", SinglePhaseParallelIndexTaskRunner.class.getName(), runner.getClass().getName());
    }
    // This context is set in the constructor of ParallelIndexSupervisorTask if it's not set by others.
    final boolean useLineageBasedSegmentAllocation = Preconditions.checkNotNull(getContextValue(SinglePhaseParallelIndexTaskRunner.CTX_USE_LINEAGE_BASED_SEGMENT_ALLOCATION_KEY), "useLineageBasedSegmentAllocation in taskContext");
    try {
        final SegmentIdWithShardSpec segmentIdentifier;
        if (useLineageBasedSegmentAllocation) {
            SegmentAllocationRequest request = toolbox.getJsonMapper().convertValue(param, SegmentAllocationRequest.class);
            segmentIdentifier = ((SinglePhaseParallelIndexTaskRunner) runner).allocateNewSegment(getDataSource(), request.getTimestamp(), request.getSequenceName(), request.getPrevSegmentId());
        } else {
            DateTime timestamp = toolbox.getJsonMapper().convertValue(param, DateTime.class);
            segmentIdentifier = ((SinglePhaseParallelIndexTaskRunner) runner).allocateNewSegment(getDataSource(), timestamp);
        }
        return Response.ok(toolbox.getJsonMapper().writeValueAsBytes(segmentIdentifier)).build();
    } catch (MaxAllowedLocksExceededException malee) {
        getCurrentRunner().stopGracefully(malee.getMessage());
        return Response.status(Response.Status.BAD_REQUEST).entity(malee.getMessage()).build();
    } catch (IOException | IllegalStateException e) {
        return Response.serverError().entity(Throwables.getStackTraceAsString(e)).build();
    } catch (IllegalArgumentException e) {
        return Response.status(Response.Status.BAD_REQUEST).entity(Throwables.getStackTraceAsString(e)).build();
    }
}

Also used : MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DateTime(org.joda.time.DateTime) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces) Consumes(javax.ws.rs.Consumes)

Example 15 with SegmentIdWithShardSpec

use of org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec in project druid by druid-io.

the class HashPartitionCachingLocalSegmentAllocatorTest method allocatesCorrectShardSpec.

@Test
public void allocatesCorrectShardSpec() throws IOException {
    InputRow row = createInputRow();
    String sequenceName = sequenceNameFunction.getSequenceName(INTERVAL, row);
    SegmentIdWithShardSpec segmentIdWithShardSpec = target.allocate(row, sequenceName, null, false);
    Assert.assertEquals(SegmentId.of(DATASOURCE, INTERVAL, VERSION, PARTITION_NUM), segmentIdWithShardSpec.asSegmentId());
    HashBucketShardSpec shardSpec = (HashBucketShardSpec) segmentIdWithShardSpec.getShardSpec();
    Assert.assertEquals(PARTITION_DIMENSIONS, shardSpec.getPartitionDimensions());
    Assert.assertEquals(NUM_PARTITONS, shardSpec.getNumBuckets());
    Assert.assertEquals(PARTITION_NUM, shardSpec.getBucketId());
}

Also used : HashBucketShardSpec(org.apache.druid.timeline.partition.HashBucketShardSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test)

Aggregations

SegmentIdWithShardSpec (org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec)36 Test (org.junit.Test)23 DataSegment (org.apache.druid.timeline.DataSegment)14 Interval (org.joda.time.Interval)14 NoopTask (org.apache.druid.indexing.common.task.NoopTask)12 Task (org.apache.druid.indexing.common.task.Task)12 PartialShardSpec (org.apache.druid.timeline.partition.PartialShardSpec)11 HashBasedNumberedPartialShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedPartialShardSpec)10 NumberedPartialShardSpec (org.apache.druid.timeline.partition.NumberedPartialShardSpec)10 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)9 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)9 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)8 NumberedOverwritePartialShardSpec (org.apache.druid.timeline.partition.NumberedOverwritePartialShardSpec)7 IOException (java.io.IOException)6 HashSet (java.util.HashSet)6 Map (java.util.Map)6 DateTime (org.joda.time.DateTime)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Iterables (com.google.common.collect.Iterables)5 List (java.util.List)5