use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class TestResourceUsageEmulators method testResourceUsageMatcherRunner.
/**
* Test {@link LoadJob.ResourceUsageMatcherRunner}.
*/
@Test
@SuppressWarnings("unchecked")
public void testResourceUsageMatcherRunner() throws Exception {
Configuration conf = new Configuration();
FakeProgressive progress = new FakeProgressive();
// set the resource calculator plugin
conf.setClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN, DummyResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
// set the resources
// set the resource implementation class
conf.setClass(ResourceUsageMatcher.RESOURCE_USAGE_EMULATION_PLUGINS, TestResourceUsageEmulatorPlugin.class, ResourceUsageEmulatorPlugin.class);
long currentTime = System.currentTimeMillis();
// initialize the matcher class
TaskAttemptID id = new TaskAttemptID("test", 1, TaskType.MAP, 1, 1);
StatusReporter reporter = new DummyReporter(progress);
TaskInputOutputContext context = new MapContextImpl(conf, id, null, null, null, reporter, null);
FakeResourceUsageMatcherRunner matcher = new FakeResourceUsageMatcherRunner(context, null);
// check if the matcher initialized the plugin
String identifier = TestResourceUsageEmulatorPlugin.DEFAULT_IDENTIFIER;
long initTime = TestResourceUsageEmulatorPlugin.testInitialization(identifier, conf);
assertTrue("ResourceUsageMatcherRunner failed to initialize the" + " configured plugin", initTime > currentTime);
// check the progress
assertEquals("Progress mismatch in ResourceUsageMatcherRunner", 0, progress.getProgress(), 0D);
// call match() and check progress
progress.setProgress(0.01f);
currentTime = System.currentTimeMillis();
matcher.test();
long emulateTime = TestResourceUsageEmulatorPlugin.testEmulation(identifier, conf);
assertTrue("ProgressBasedResourceUsageMatcher failed to load and emulate" + " the configured plugin", emulateTime > currentTime);
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class ZombieJob method buildMaps.
/**
* Build task mapping and task attempt mapping, to be later used to find
* information of a particular {@link TaskID} or {@link TaskAttemptID}.
*/
private synchronized void buildMaps() {
if (loggedTaskMap == null) {
loggedTaskMap = new HashMap<TaskID, LoggedTask>();
loggedTaskAttemptMap = new HashMap<TaskAttemptID, LoggedTaskAttempt>();
for (LoggedTask map : job.getMapTasks()) {
map = sanitizeLoggedTask(map);
if (map != null) {
loggedTaskMap.put(maskTaskID(map.taskID), map);
for (LoggedTaskAttempt mapAttempt : map.getAttempts()) {
mapAttempt = sanitizeLoggedTaskAttempt(mapAttempt);
if (mapAttempt != null) {
TaskAttemptID id = mapAttempt.getAttemptID();
loggedTaskAttemptMap.put(maskAttemptID(id), mapAttempt);
}
}
}
}
for (LoggedTask reduce : job.getReduceTasks()) {
reduce = sanitizeLoggedTask(reduce);
if (reduce != null) {
loggedTaskMap.put(maskTaskID(reduce.taskID), reduce);
for (LoggedTaskAttempt reduceAttempt : reduce.getAttempts()) {
reduceAttempt = sanitizeLoggedTaskAttempt(reduceAttempt);
if (reduceAttempt != null) {
TaskAttemptID id = reduceAttempt.getAttemptID();
loggedTaskAttemptMap.put(maskAttemptID(id), reduceAttempt);
}
}
}
}
// TODO: do not care about "other" tasks, "setup" or "clean"
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.
the class ZombieJob method maskAttemptID.
/**
* Mask the job ID part in a {@link TaskAttemptID}.
*
* @param attemptId
* raw {@link TaskAttemptID} read from trace
* @return masked {@link TaskAttemptID} with empty {@link JobID}.
*/
private TaskAttemptID maskAttemptID(TaskAttemptID attemptId) {
JobID jobId = new JobID();
TaskType taskType = attemptId.getTaskType();
TaskID taskId = attemptId.getTaskID();
return new TaskAttemptID(jobId.getJtIdentifier(), jobId.getId(), taskType, taskId.getId(), attemptId.getId());
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project presto by prestodb.
the class ParquetHiveRecordCursor method createParquetRecordReader.
private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader(HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager, boolean predicatePushdownEnabled, TupleDomain<HiveColumnHandle> effectivePredicate) {
ParquetDataSource dataSource = null;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
ParquetMetadata parquetMetadata = hdfsEnvironment.doAs(sessionUser, () -> ParquetFileReader.readFooter(configuration, path, NO_FILTER));
List<BlockMetaData> blocks = parquetMetadata.getBlocks();
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
PrestoReadSupport readSupport = new PrestoReadSupport(useParquetColumnNames, columns, fileSchema);
List<parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
LongArrayList offsets = new LongArrayList(blocks.size());
for (BlockMetaData block : blocks) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= start && firstDataPage < start + length) {
if (predicatePushdownEnabled) {
ParquetPredicate parquetPredicate = buildParquetPredicate(columns, effectivePredicate, fileMetaData.getSchema(), typeManager);
if (predicateMatches(parquetPredicate, block, dataSource, requestedSchema, effectivePredicate)) {
offsets.add(block.getStartingPos());
}
} else {
offsets.add(block.getStartingPos());
}
}
}
ParquetInputSplit split = new ParquetInputSplit(path, start, start + length, length, null, offsets.toLongArray());
TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID());
return hdfsEnvironment.doAs(sessionUser, () -> {
ParquetRecordReader<FakeParquetRecord> realReader = new PrestoParquetRecordReader(readSupport);
realReader.initialize(split, taskContext);
return realReader;
});
} catch (Exception e) {
Throwables.propagateIfInstanceOf(e, PrestoException.class);
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
} finally {
if (dataSource != null) {
try {
dataSource.close();
} catch (IOException ignored) {
}
}
}
}
use of org.apache.hadoop.mapreduce.TaskAttemptID in project jena by apache.
the class AbstractBlankNodeTests method blank_node_identity_02.
/**
* Test that starts with two blank nodes in two different files and checks
* that writing them to a single file does not conflate them
*
* @throws IOException
* @throws InterruptedException
*/
@Test
public void blank_node_identity_02() throws IOException, InterruptedException {
Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());
// Temporary files
File a = File.createTempFile("bnode_identity", getInitialInputExtension());
File b = File.createTempFile("bnode_identity", getInitialInputExtension());
File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();
try {
// Prepare the input data
// Same blank node but in different files so must be treated as
// different blank nodes and not converge
List<T> tuples = new ArrayList<>();
Node bnode = NodeFactory.createBlankNode();
Node pred = NodeFactory.createURI("http://example.org/predicate");
tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
writeTuples(a, tuples);
tuples.clear();
tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
writeTuples(b, tuples);
// Set up fake job which will process the two files
Configuration config = new Configuration(true);
InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
Job job = Job.getInstance(config);
job.setInputFormatClass(inputFormat.getClass());
NLineInputFormat.setNumLinesPerSplit(job, 100);
FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
// Get the splits
List<InputSplit> splits = inputFormat.getSplits(context);
Assert.assertEquals(2, splits.size());
// Prepare the output writing - putting all output to a single file
OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 2, 1));
RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);
for (InputSplit split : splits) {
// Initialize the input reading
TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 1, 1));
RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
reader.initialize(split, inputTaskContext);
// output
while (reader.nextKeyValue()) {
writer.write(reader.getCurrentKey(), reader.getCurrentValue());
}
}
writer.close(outputTaskContext);
// Promote outputs from temporary status
promoteInputs(intermediateOutputDir);
// Now we need to create a subsequent job that reads the
// intermediate outputs
// The Blank nodes should have been given separate identities so we
// should not be conflating them, this is the opposite problem to
// that described in JENA-820
LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
job = Job.getInstance(config);
inputFormat = createIntermediateInputFormat();
job.setInputFormatClass(inputFormat.getClass());
NLineInputFormat.setNumLinesPerSplit(job, 100);
FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
context = new JobContextImpl(job.getConfiguration(), job.getJobID());
// Get the splits
splits = inputFormat.getSplits(context);
Assert.assertEquals(1, splits.size());
// Expect to end up with a single blank node
Set<Node> nodes = new HashSet<Node>();
for (InputSplit split : splits) {
TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
reader.initialize(split, inputTaskContext);
while (reader.nextKeyValue()) {
nodes.add(getSubject(reader.getCurrentValue().get()));
}
}
// Nodes must not diverge
Assert.assertEquals(2, nodes.size());
} finally {
a.delete();
b.delete();
deleteDirectory(intermediateOutputDir);
}
}
Aggregations