Search in sources :

Example 1 with DataSegmentAndIndexZipFilePath

use of org.apache.druid.indexer.DataSegmentAndIndexZipFilePath in project druid by druid-io.

the class HadoopIndexTask method runInternal.

@SuppressWarnings("unchecked")
private TaskStatus runInternal(TaskToolbox toolbox) throws Exception {
    boolean indexGeneratorJobAttempted = false;
    boolean indexGeneratorJobSuccess = false;
    HadoopIngestionSpec indexerSchema = null;
    try {
        registerResourceCloserOnAbnormalExit(config -> killHadoopJob());
        String hadoopJobIdFile = getHadoopJobIdFileName();
        final ClassLoader loader = buildClassLoader(toolbox);
        boolean determineIntervals = spec.getDataSchema().getGranularitySpec().inputIntervals().isEmpty();
        HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(spec, jsonMapper, new OverlordActionBasedUsedSegmentsRetriever(toolbox));
        Object determinePartitionsInnerProcessingRunner = getForeignClassloaderObject("org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner", loader);
        determinePartitionsStatsGetter = new InnerProcessingStatsGetter(determinePartitionsInnerProcessingRunner);
        String[] determinePartitionsInput = new String[] { toolbox.getJsonMapper().writeValueAsString(spec), toolbox.getConfig().getHadoopWorkingPath(), toolbox.getSegmentPusher().getPathForHadoop(), hadoopJobIdFile };
        final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader();
        Class<?> determinePartitionsRunnerClass = determinePartitionsInnerProcessingRunner.getClass();
        Method determinePartitionsInnerProcessingRunTask = determinePartitionsRunnerClass.getMethod("runTask", determinePartitionsInput.getClass());
        try {
            Thread.currentThread().setContextClassLoader(loader);
            ingestionState = IngestionState.DETERMINE_PARTITIONS;
            final String determineConfigStatusString = (String) determinePartitionsInnerProcessingRunTask.invoke(determinePartitionsInnerProcessingRunner, new Object[] { determinePartitionsInput });
            determineConfigStatus = toolbox.getJsonMapper().readValue(determineConfigStatusString, HadoopDetermineConfigInnerProcessingStatus.class);
            indexerSchema = determineConfigStatus.getSchema();
            if (indexerSchema == null) {
                errorMsg = determineConfigStatus.getErrorMsg();
                toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
                return TaskStatus.failure(getId(), errorMsg);
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            Thread.currentThread().setContextClassLoader(oldLoader);
        }
        // We should have a lock from before we started running only if interval was specified
        String version;
        if (determineIntervals) {
            Interval interval = JodaUtils.umbrellaInterval(JodaUtils.condenseIntervals(indexerSchema.getDataSchema().getGranularitySpec().sortedBucketIntervals()));
            final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
            // Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, the below line can incur http timeout error.
            final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval, lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", interval);
            if (lock.isRevoked()) {
                throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
            }
            version = lock.getVersion();
        } else {
            Iterable<TaskLock> locks = getTaskLocks(toolbox.getTaskActionClient());
            final TaskLock myLock = Iterables.getOnlyElement(locks);
            version = myLock.getVersion();
        }
        final String specVersion = indexerSchema.getTuningConfig().getVersion();
        if (indexerSchema.getTuningConfig().isUseExplicitVersion()) {
            if (specVersion.compareTo(version) < 0) {
                version = specVersion;
            } else {
                String errMsg = StringUtils.format("Spec version can not be greater than or equal to the lock version, Spec version: [%s] Lock version: [%s].", specVersion, version);
                log.error(errMsg);
                toolbox.getTaskReportFileWriter().write(getId(), null);
                return TaskStatus.failure(getId(), errMsg);
            }
        }
        log.info("Setting version to: %s", version);
        Object innerProcessingRunner = getForeignClassloaderObject("org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessingRunner", loader);
        buildSegmentsStatsGetter = new InnerProcessingStatsGetter(innerProcessingRunner);
        String[] buildSegmentsInput = new String[] { toolbox.getJsonMapper().writeValueAsString(indexerSchema), version, hadoopJobIdFile };
        Class<?> buildSegmentsRunnerClass = innerProcessingRunner.getClass();
        Method innerProcessingRunTask = buildSegmentsRunnerClass.getMethod("runTask", buildSegmentsInput.getClass());
        try {
            Thread.currentThread().setContextClassLoader(loader);
            ingestionState = IngestionState.BUILD_SEGMENTS;
            indexGeneratorJobAttempted = true;
            final String jobStatusString = (String) innerProcessingRunTask.invoke(innerProcessingRunner, new Object[] { buildSegmentsInput });
            buildSegmentsStatus = toolbox.getJsonMapper().readValue(jobStatusString, HadoopIndexGeneratorInnerProcessingStatus.class);
            List<DataSegmentAndIndexZipFilePath> dataSegmentAndIndexZipFilePaths = buildSegmentsStatus.getDataSegmentAndIndexZipFilePaths();
            if (dataSegmentAndIndexZipFilePaths != null) {
                indexGeneratorJobSuccess = true;
                renameSegmentIndexFilesJob(toolbox.getJsonMapper().writeValueAsString(indexerSchema), toolbox.getJsonMapper().writeValueAsString(dataSegmentAndIndexZipFilePaths));
                ArrayList<DataSegment> segments = new ArrayList<>(dataSegmentAndIndexZipFilePaths.stream().map(DataSegmentAndIndexZipFilePath::getSegment).collect(Collectors.toList()));
                toolbox.publishSegments(segments);
                // for awaitSegmentAvailabilityTimeoutMillis
                if (spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis() > 0) {
                    ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT;
                    waitForSegmentAvailability(toolbox, segments, spec.getTuningConfig().getAwaitSegmentAvailabilityTimeoutMillis());
                }
                ingestionState = IngestionState.COMPLETED;
                toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
                return TaskStatus.success(getId());
            } else {
                errorMsg = buildSegmentsStatus.getErrorMsg();
                toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
                return TaskStatus.failure(getId(), errorMsg);
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            Thread.currentThread().setContextClassLoader(oldLoader);
        }
    } finally {
        indexerGeneratorCleanupJob(indexGeneratorJobAttempted, indexGeneratorJobSuccess, indexerSchema == null ? null : toolbox.getJsonMapper().writeValueAsString(indexerSchema));
    }
}
Also used : HadoopIngestionSpec(org.apache.druid.indexer.HadoopIngestionSpec) ArrayList(java.util.ArrayList) OverlordActionBasedUsedSegmentsRetriever(org.apache.druid.indexing.hadoop.OverlordActionBasedUsedSegmentsRetriever) DataSegment(org.apache.druid.timeline.DataSegment) TaskLock(org.apache.druid.indexing.common.TaskLock) ISE(org.apache.druid.java.util.common.ISE) DataSegmentAndIndexZipFilePath(org.apache.druid.indexer.DataSegmentAndIndexZipFilePath) Method(java.lang.reflect.Method) InvocationTargetException(java.lang.reflect.InvocationTargetException) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) Interval(org.joda.time.Interval)

Aggregations

InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1 ArrayList (java.util.ArrayList)1 DataSegmentAndIndexZipFilePath (org.apache.druid.indexer.DataSegmentAndIndexZipFilePath)1 HadoopIngestionSpec (org.apache.druid.indexer.HadoopIngestionSpec)1 TaskLock (org.apache.druid.indexing.common.TaskLock)1 TimeChunkLockAcquireAction (org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction)1 OverlordActionBasedUsedSegmentsRetriever (org.apache.druid.indexing.hadoop.OverlordActionBasedUsedSegmentsRetriever)1 ISE (org.apache.druid.java.util.common.ISE)1 DataSegment (org.apache.druid.timeline.DataSegment)1 Interval (org.joda.time.Interval)1