Search in sources :

Example 71 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class TestResourceUsageEmulators method testResourceUsageMatcherRunner.

/**
   * Test {@link LoadJob.ResourceUsageMatcherRunner}.
   */
@Test
@SuppressWarnings("unchecked")
public void testResourceUsageMatcherRunner() throws Exception {
    Configuration conf = new Configuration();
    FakeProgressive progress = new FakeProgressive();
    // set the resource calculator plugin
    conf.setClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN, DummyResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class);
    // set the resources
    // set the resource implementation class
    conf.setClass(ResourceUsageMatcher.RESOURCE_USAGE_EMULATION_PLUGINS, TestResourceUsageEmulatorPlugin.class, ResourceUsageEmulatorPlugin.class);
    long currentTime = System.currentTimeMillis();
    // initialize the matcher class
    TaskAttemptID id = new TaskAttemptID("test", 1, TaskType.MAP, 1, 1);
    StatusReporter reporter = new DummyReporter(progress);
    TaskInputOutputContext context = new MapContextImpl(conf, id, null, null, null, reporter, null);
    FakeResourceUsageMatcherRunner matcher = new FakeResourceUsageMatcherRunner(context, null);
    // check if the matcher initialized the plugin
    String identifier = TestResourceUsageEmulatorPlugin.DEFAULT_IDENTIFIER;
    long initTime = TestResourceUsageEmulatorPlugin.testInitialization(identifier, conf);
    assertTrue("ResourceUsageMatcherRunner failed to initialize the" + " configured plugin", initTime > currentTime);
    // check the progress
    assertEquals("Progress mismatch in ResourceUsageMatcherRunner", 0, progress.getProgress(), 0D);
    // call match() and check progress
    progress.setProgress(0.01f);
    currentTime = System.currentTimeMillis();
    matcher.test();
    long emulateTime = TestResourceUsageEmulatorPlugin.testEmulation(identifier, conf);
    assertTrue("ProgressBasedResourceUsageMatcher failed to load and emulate" + " the configured plugin", emulateTime > currentTime);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskInputOutputContext(org.apache.hadoop.mapreduce.TaskInputOutputContext) StatusReporter(org.apache.hadoop.mapreduce.StatusReporter) Test(org.junit.Test)

Example 72 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class ZombieJob method buildMaps.

/**
   * Build task mapping and task attempt mapping, to be later used to find
   * information of a particular {@link TaskID} or {@link TaskAttemptID}.
   */
private synchronized void buildMaps() {
    if (loggedTaskMap == null) {
        loggedTaskMap = new HashMap<TaskID, LoggedTask>();
        loggedTaskAttemptMap = new HashMap<TaskAttemptID, LoggedTaskAttempt>();
        for (LoggedTask map : job.getMapTasks()) {
            map = sanitizeLoggedTask(map);
            if (map != null) {
                loggedTaskMap.put(maskTaskID(map.taskID), map);
                for (LoggedTaskAttempt mapAttempt : map.getAttempts()) {
                    mapAttempt = sanitizeLoggedTaskAttempt(mapAttempt);
                    if (mapAttempt != null) {
                        TaskAttemptID id = mapAttempt.getAttemptID();
                        loggedTaskAttemptMap.put(maskAttemptID(id), mapAttempt);
                    }
                }
            }
        }
        for (LoggedTask reduce : job.getReduceTasks()) {
            reduce = sanitizeLoggedTask(reduce);
            if (reduce != null) {
                loggedTaskMap.put(maskTaskID(reduce.taskID), reduce);
                for (LoggedTaskAttempt reduceAttempt : reduce.getAttempts()) {
                    reduceAttempt = sanitizeLoggedTaskAttempt(reduceAttempt);
                    if (reduceAttempt != null) {
                        TaskAttemptID id = reduceAttempt.getAttemptID();
                        loggedTaskAttemptMap.put(maskAttemptID(id), reduceAttempt);
                    }
                }
            }
        }
    // TODO: do not care about "other" tasks, "setup" or "clean"
    }
}
Also used : TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID)

Example 73 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hadoop by apache.

the class ZombieJob method maskAttemptID.

/**
   * Mask the job ID part in a {@link TaskAttemptID}.
   * 
   * @param attemptId
   *          raw {@link TaskAttemptID} read from trace
   * @return masked {@link TaskAttemptID} with empty {@link JobID}.
   */
private TaskAttemptID maskAttemptID(TaskAttemptID attemptId) {
    JobID jobId = new JobID();
    TaskType taskType = attemptId.getTaskType();
    TaskID taskId = attemptId.getTaskID();
    return new TaskAttemptID(jobId.getJtIdentifier(), jobId.getId(), taskType, taskId.getId(), attemptId.getId());
}
Also used : TaskID(org.apache.hadoop.mapreduce.TaskID) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskType(org.apache.hadoop.mapreduce.TaskType) JobID(org.apache.hadoop.mapreduce.JobID)

Example 74 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project presto by prestodb.

the class ParquetHiveRecordCursor method createParquetRecordReader.

private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader(HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager, boolean predicatePushdownEnabled, TupleDomain<HiveColumnHandle> effectivePredicate) {
    ParquetDataSource dataSource = null;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
        dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
        ParquetMetadata parquetMetadata = hdfsEnvironment.doAs(sessionUser, () -> ParquetFileReader.readFooter(configuration, path, NO_FILTER));
        List<BlockMetaData> blocks = parquetMetadata.getBlocks();
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        PrestoReadSupport readSupport = new PrestoReadSupport(useParquetColumnNames, columns, fileSchema);
        List<parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
        MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
        LongArrayList offsets = new LongArrayList(blocks.size());
        for (BlockMetaData block : blocks) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                if (predicatePushdownEnabled) {
                    ParquetPredicate parquetPredicate = buildParquetPredicate(columns, effectivePredicate, fileMetaData.getSchema(), typeManager);
                    if (predicateMatches(parquetPredicate, block, dataSource, requestedSchema, effectivePredicate)) {
                        offsets.add(block.getStartingPos());
                    }
                } else {
                    offsets.add(block.getStartingPos());
                }
            }
        }
        ParquetInputSplit split = new ParquetInputSplit(path, start, start + length, length, null, offsets.toLongArray());
        TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID());
        return hdfsEnvironment.doAs(sessionUser, () -> {
            ParquetRecordReader<FakeParquetRecord> realReader = new PrestoParquetRecordReader(readSupport);
            realReader.initialize(split, taskContext);
            return realReader;
        });
    } catch (Exception e) {
        Throwables.propagateIfInstanceOf(e, PrestoException.class);
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
            throw Throwables.propagate(e);
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    } finally {
        if (dataSource != null) {
            try {
                dataSource.close();
            } catch (IOException ignored) {
            }
        }
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Arrays(java.util.Arrays) Block(com.facebook.presto.spi.block.Block) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_CURSOR_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_CURSOR_ERROR) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) Slices.wrappedBuffer(io.airlift.slice.Slices.wrappedBuffer) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DecimalType(com.facebook.presto.spi.type.DecimalType) DecimalMetadata(parquet.schema.DecimalMetadata) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BigInteger(java.math.BigInteger) PrimitiveType(parquet.schema.PrimitiveType) MAP_KEY_VALUE(parquet.schema.OriginalType.MAP_KEY_VALUE) Decimals(com.facebook.presto.spi.type.Decimals) ReadSupport(parquet.hadoop.api.ReadSupport) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) BlockBuilder(com.facebook.presto.spi.block.BlockBuilder) Math.min(java.lang.Math.min) Chars.trimSpacesAndTruncateToLength(com.facebook.presto.spi.type.Chars.trimSpacesAndTruncateToLength) Binary(parquet.io.api.Binary) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) ROW(com.facebook.presto.spi.type.StandardTypes.ROW) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) NO_FILTER(parquet.format.converter.ParquetMetadataConverter.NO_FILTER) Optional(java.util.Optional) Math.max(java.lang.Math.max) Varchars.truncateToLength(com.facebook.presto.spi.type.Varchars.truncateToLength) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) RecordMaterializer(parquet.io.api.RecordMaterializer) Converter(parquet.io.api.Converter) Varchars.isVarcharType(com.facebook.presto.spi.type.Varchars.isVarcharType) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) GroupConverter(parquet.io.api.GroupConverter) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) Slice(io.airlift.slice.Slice) ParquetFileReader(parquet.hadoop.ParquetFileReader) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ParquetRecordReader(parquet.hadoop.ParquetRecordReader) PrestoException(com.facebook.presto.spi.PrestoException) PrimitiveConverter(parquet.io.api.PrimitiveConverter) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) MAP(com.facebook.presto.spi.type.StandardTypes.MAP) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) DecimalUtils(com.facebook.presto.hive.util.DecimalUtils) ARRAY(com.facebook.presto.spi.type.StandardTypes.ARRAY) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) HiveUtil.closeWithSuppression(com.facebook.presto.hive.HiveUtil.closeWithSuppression) Type(com.facebook.presto.spi.type.Type) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) Objects.requireNonNull(java.util.Objects.requireNonNull) DECIMAL(parquet.schema.OriginalType.DECIMAL) BlockBuilderStatus(com.facebook.presto.spi.block.BlockBuilderStatus) Dictionary(parquet.column.Dictionary) TIMESTAMP(com.facebook.presto.spi.type.TimestampType.TIMESTAMP) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) MessageType(parquet.schema.MessageType) Properties(java.util.Properties) ParquetPredicateUtils.predicateMatches(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.predicateMatches) HiveUtil.getDecimalType(com.facebook.presto.hive.HiveUtil.getDecimalType) ContextUtil(parquet.hadoop.util.ContextUtil) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) FileMetaData(parquet.hadoop.metadata.FileMetaData) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) Collectors.toList(java.util.stream.Collectors.toList) GroupType(parquet.schema.GroupType) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) ParquetInputSplit(parquet.hadoop.ParquetInputSplit) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) PrestoException(com.facebook.presto.spi.PrestoException) FileSystem(org.apache.hadoop.fs.FileSystem) FileMetaData(parquet.hadoop.metadata.FileMetaData) MessageType(parquet.schema.MessageType) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) DecimalType(com.facebook.presto.spi.type.DecimalType) PrimitiveType(parquet.schema.PrimitiveType) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) Varchars.isVarcharType(com.facebook.presto.spi.type.Varchars.isVarcharType) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) Type(com.facebook.presto.spi.type.Type) MessageType(parquet.schema.MessageType) HiveUtil.getDecimalType(com.facebook.presto.hive.HiveUtil.getDecimalType) GroupType(parquet.schema.GroupType) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) ParquetInputSplit(parquet.hadoop.ParquetInputSplit)

Example 75 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project jena by apache.

the class AbstractBlankNodeTests method blank_node_identity_02.

/**
     * Test that starts with two blank nodes in two different files and checks
     * that writing them to a single file does not conflate them
     * 
     * @throws IOException
     * @throws InterruptedException
     */
@Test
public void blank_node_identity_02() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());
    // Temporary files
    File a = File.createTempFile("bnode_identity", getInitialInputExtension());
    File b = File.createTempFile("bnode_identity", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();
    try {
        // Prepare the input data
        // Same blank node but in different files so must be treated as
        // different blank nodes and not converge
        List<T> tuples = new ArrayList<>();
        Node bnode = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
        writeTuples(a, tuples);
        tuples.clear();
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
        writeTuples(b, tuples);
        // Set up fake job which will process the two files
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());
        // Prepare the output writing - putting all output to a single file
        OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
        TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 2, 1));
        RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);
        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);
            // output
            while (reader.nextKeyValue()) {
                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
            }
        }
        writer.close(outputTaskContext);
        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);
        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // The Blank nodes should have been given separate identities so we
        // should not be conflating them, this is the opposite problem to
        // that described in JENA-820
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());
        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());
        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);
            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes must not diverge
        Assert.assertEquals(2, nodes.size());
    } finally {
        a.delete();
        b.delete();
        deleteDirectory(intermediateOutputDir);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Node(org.apache.jena.graph.Node) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) LongWritable(org.apache.hadoop.io.LongWritable) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File) InputSplit(org.apache.hadoop.mapreduce.InputSplit) FileAttribute(java.nio.file.attribute.FileAttribute) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)78 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)35 Test (org.junit.Test)34 Configuration (org.apache.hadoop.conf.Configuration)28 Path (org.apache.hadoop.fs.Path)25 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)22 IOException (java.io.IOException)19 JobID (org.apache.hadoop.mapreduce.JobID)16 TaskID (org.apache.hadoop.mapreduce.TaskID)15 File (java.io.File)14 Job (org.apache.hadoop.mapreduce.Job)14 ArrayList (java.util.ArrayList)13 JobContext (org.apache.hadoop.mapreduce.JobContext)12 LongWritable (org.apache.hadoop.io.LongWritable)11 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)8 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)8 HashMap (java.util.HashMap)7