use of org.apache.druid.indexing.common.task.HadoopIndexTask in project druid by druid-io.
the class MaterializedViewSupervisorSpec method createTask.
public HadoopIndexTask createTask(Interval interval, String version, List<DataSegment> segments) {
String taskId = StringUtils.format("%s_%s_%s", TASK_PREFIX, dataSourceName, DateTimes.nowUtc());
// generate parser
Map<String, Object> parseSpec = new HashMap<>();
parseSpec.put("format", "timeAndDims");
parseSpec.put("dimensionsSpec", dimensionsSpec);
Map<String, Object> parser = new HashMap<>();
parser.put("type", "map");
parser.put("parseSpec", parseSpec);
// generate HadoopTuningConfig
HadoopTuningConfig tuningConfigForTask = new HadoopTuningConfig(tuningConfig.getWorkingPath(), version, tuningConfig.getPartitionsSpec(), tuningConfig.getShardSpecs(), tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemory(), tuningConfig.isLeaveIntermediate(), tuningConfig.isCleanupOnFailure(), tuningConfig.isOverwriteFiles(), tuningConfig.isIgnoreInvalidRows(), tuningConfig.getJobProperties(), tuningConfig.isCombineText(), tuningConfig.getUseCombiner(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getNumBackgroundPersistThreads(), tuningConfig.isForceExtendableShardSpecs(), true, tuningConfig.getUserAllowedHadoopPrefix(), tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), tuningConfig.isUseYarnRMJobStatusFallback(), tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis());
// generate granularity
ArbitraryGranularitySpec granularitySpec = new ArbitraryGranularitySpec(Granularities.NONE, ImmutableList.of(interval));
// generate DataSchema
DataSchema dataSchema = new DataSchema(dataSourceName, parser, aggregators, granularitySpec, TransformSpec.NONE, objectMapper);
// generate DatasourceIngestionSpec
DatasourceIngestionSpec datasourceIngestionSpec = new DatasourceIngestionSpec(baseDataSource, null, ImmutableList.of(interval), segments, null, null, null, false, null);
// generate HadoopIOConfig
Map<String, Object> inputSpec = new HashMap<>();
inputSpec.put("type", "dataSource");
inputSpec.put("ingestionSpec", datasourceIngestionSpec);
HadoopIOConfig hadoopIOConfig = new HadoopIOConfig(inputSpec, null, null);
// generate HadoopIngestionSpec
HadoopIngestionSpec spec = new HadoopIngestionSpec(dataSchema, hadoopIOConfig, tuningConfigForTask);
// generate HadoopIndexTask
HadoopIndexTask task = new HadoopIndexTask(taskId, spec, hadoopCoordinates, hadoopDependencyCoordinates, classpathPrefix, objectMapper, context, authorizerMapper, chatHandlerProvider);
return task;
}
use of org.apache.druid.indexing.common.task.HadoopIndexTask in project druid by druid-io.
the class MaterializedViewSupervisorTest method testCreateTask.
/**
* Verifies that creating HadoopIndexTask compleates without raising exception.
*/
@Test
public void testCreateTask() {
List<DataSegment> baseSegments = Collections.singletonList(new DataSegment("base", Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", ImmutableMap.of(), ImmutableList.of("dim1", "dim2"), ImmutableList.of("m1"), new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null), 9, 1024));
HadoopIndexTask task = spec.createTask(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "2015-01-03", baseSegments);
Assert.assertNotNull(task);
}
use of org.apache.druid.indexing.common.task.HadoopIndexTask in project druid by druid-io.
the class MaterializedViewSupervisor method checkSegmentsAndSubmitTasks.
/**
* Find intervals in which derived dataSource should rebuild the segments.
* Choose the latest intervals to create new HadoopIndexTask and submit it.
*/
@VisibleForTesting
void checkSegmentsAndSubmitTasks() {
synchronized (taskLock) {
List<Interval> intervalsToRemove = new ArrayList<>();
for (Map.Entry<Interval, HadoopIndexTask> entry : runningTasks.entrySet()) {
Optional<TaskStatus> taskStatus = taskStorage.getStatus(entry.getValue().getId());
if (!taskStatus.isPresent() || !taskStatus.get().isRunnable()) {
intervalsToRemove.add(entry.getKey());
}
}
for (Interval interval : intervalsToRemove) {
runningTasks.remove(interval);
runningVersion.remove(interval);
}
if (runningTasks.size() == maxTaskCount) {
// if the number of running tasks reach the max task count, supervisor won't submit new tasks.
return;
}
Pair<SortedMap<Interval, String>, Map<Interval, List<DataSegment>>> toBuildIntervalAndBaseSegments = checkSegments();
SortedMap<Interval, String> sortedToBuildVersion = toBuildIntervalAndBaseSegments.lhs;
Map<Interval, List<DataSegment>> baseSegments = toBuildIntervalAndBaseSegments.rhs;
missInterval = sortedToBuildVersion.keySet();
submitTasks(sortedToBuildVersion, baseSegments);
}
}
use of org.apache.druid.indexing.common.task.HadoopIndexTask in project druid by druid-io.
the class MaterializedViewSupervisor method clearTasks.
private void clearTasks() {
for (HadoopIndexTask task : runningTasks.values()) {
if (taskMaster.getTaskQueue().isPresent()) {
taskMaster.getTaskQueue().get().shutdown(task.getId(), "killing all tasks");
}
}
runningTasks.clear();
runningVersion.clear();
}
use of org.apache.druid.indexing.common.task.HadoopIndexTask in project druid by druid-io.
the class MaterializedViewSupervisor method submitTasks.
private void submitTasks(SortedMap<Interval, String> sortedToBuildVersion, Map<Interval, List<DataSegment>> baseSegments) {
for (Map.Entry<Interval, String> entry : sortedToBuildVersion.entrySet()) {
if (runningTasks.size() < maxTaskCount) {
HadoopIndexTask task = spec.createTask(entry.getKey(), entry.getValue(), baseSegments.get(entry.getKey()));
try {
if (taskMaster.getTaskQueue().isPresent()) {
taskMaster.getTaskQueue().get().add(task);
runningVersion.put(entry.getKey(), entry.getValue());
runningTasks.put(entry.getKey(), task);
}
} catch (EntryExistsException e) {
log.error("task %s already exsits", task);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
}
Aggregations