Search in sources :

Example 31 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project jena by apache.

the class AbstractNodeTupleOutputFormatTests method testOutput.

/**
     * Tests output
     * 
     * @param f
     *            File to output to
     * @param num
     *            Number of tuples to output
     * @throws IOException
     * @throws InterruptedException
     */
protected final void testOutput(File f, int num) throws IOException, InterruptedException {
    // Prepare configuration
    Configuration config = this.prepareConfiguration();
    // Set up fake job
    OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat();
    Job job = Job.getInstance(config);
    job.setOutputFormatClass(outputFormat.getClass());
    this.addOutputPath(f, job.getConfiguration(), job);
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertNotNull(FileOutputFormat.getOutputPath(context));
    // Output the data
    TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1);
    TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id);
    RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext);
    Iterator<T> tuples = this.generateTuples(num);
    while (tuples.hasNext()) {
        writer.write(NullWritable.get(), tuples.next());
    }
    writer.close(taskContext);
    // Check output
    File outputFile = this.findOutputFile(this.folder.getRoot(), context);
    Assert.assertNotNull(outputFile);
    this.checkTuples(outputFile, num);
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) NullWritable(org.apache.hadoop.io.NullWritable) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File)

Example 32 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project jena by apache.

the class AbstractNodeTupleInputFormatTests method testSingleInput.

/**
     * Runs a test with a single input
     * 
     * @param config
     *            Configuration
     * @param input
     *            Input
     * @param expectedTuples
     *            Expected tuples
     * @throws IOException
     * @throws InterruptedException
     */
protected final void testSingleInput(Configuration config, File input, int expectedSplits, int expectedTuples) throws IOException, InterruptedException {
    // Set up fake job
    InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
    Job job = Job.getInstance(config);
    job.setInputFormatClass(inputFormat.getClass());
    this.addInputPath(input, job.getConfiguration(), job);
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertEquals(1, FileInputFormat.getInputPaths(context).length);
    NLineInputFormat.setNumLinesPerSplit(job, LARGE_SIZE);
    // Check splits
    List<InputSplit> splits = inputFormat.getSplits(context);
    Assert.assertEquals(expectedSplits, splits.size());
    // Check tuples
    for (InputSplit split : splits) {
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
        RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
        reader.initialize(split, taskContext);
        this.checkTuples(reader, expectedTuples);
    }
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) LongWritable(org.apache.hadoop.io.LongWritable) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 33 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project cdap by caskdata.

the class MapReduceTaskContextProvider method createCacheLoader.

/**
   * Creates a {@link CacheLoader} for the task context cache.
   */
private CacheLoader<ContextCacheKey, BasicMapReduceTaskContext> createCacheLoader(final Injector injector) {
    final DiscoveryServiceClient discoveryServiceClient = injector.getInstance(DiscoveryServiceClient.class);
    final DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
    final SecureStore secureStore = injector.getInstance(SecureStore.class);
    final SecureStoreManager secureStoreManager = injector.getInstance(SecureStoreManager.class);
    final MessagingService messagingService = injector.getInstance(MessagingService.class);
    // Multiple instances of BasicMapReduceTaskContext can shares the same program.
    final AtomicReference<Program> programRef = new AtomicReference<>();
    return new CacheLoader<ContextCacheKey, BasicMapReduceTaskContext>() {

        @Override
        public BasicMapReduceTaskContext load(ContextCacheKey key) throws Exception {
            MapReduceContextConfig contextConfig = new MapReduceContextConfig(key.getConfiguration());
            MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(key.getConfiguration());
            Program program = programRef.get();
            if (program == null) {
                // Creation of program is relatively cheap, so just create and do compare and set.
                programRef.compareAndSet(null, createProgram(contextConfig, classLoader.getProgramClassLoader()));
                program = programRef.get();
            }
            WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
            DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, program.getApplicationSpecification());
            // Setup dataset framework context, if required
            if (programDatasetFramework instanceof ProgramContextAware) {
                ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
                ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
            }
            MapReduceSpecification spec = program.getApplicationSpecification().getMapReduce().get(program.getName());
            MetricsCollectionService metricsCollectionService = null;
            MapReduceMetrics.TaskType taskType = null;
            String taskId = null;
            TaskAttemptID taskAttemptId = key.getTaskAttemptID();
            // from a org.apache.hadoop.io.RawComparator
            if (taskAttemptId != null) {
                taskId = taskAttemptId.getTaskID().toString();
                if (MapReduceMetrics.TaskType.hasType(taskAttemptId.getTaskType())) {
                    taskType = MapReduceMetrics.TaskType.from(taskAttemptId.getTaskType());
                    // if this is not for a mapper or a reducer, we don't need the metrics collection service
                    metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
                }
            }
            CConfiguration cConf = injector.getInstance(CConfiguration.class);
            TransactionSystemClient txClient = injector.getInstance(TransactionSystemClient.class);
            return new BasicMapReduceTaskContext(program, contextConfig.getProgramOptions(), cConf, taskType, taskId, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txClient, contextConfig.getTx(), programDatasetFramework, classLoader.getPluginInstantiator(), contextConfig.getLocalizedResources(), secureStore, secureStoreManager, authorizationEnforcer, authenticationContext, messagingService);
        }
    };
}
Also used : DiscoveryServiceClient(org.apache.twill.discovery.DiscoveryServiceClient) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) NameMappedDatasetFramework(co.cask.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) SecureStoreManager(co.cask.cdap.api.security.store.SecureStoreManager) MapReduceMetrics(co.cask.cdap.app.metrics.MapReduceMetrics) Program(co.cask.cdap.app.program.Program) DefaultProgram(co.cask.cdap.app.program.DefaultProgram) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) MapReduceSpecification(co.cask.cdap.api.mapreduce.MapReduceSpecification) AtomicReference(java.util.concurrent.atomic.AtomicReference) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) SecureStore(co.cask.cdap.api.security.store.SecureStore) CConfiguration(co.cask.cdap.common.conf.CConfiguration) MessagingService(co.cask.cdap.messaging.MessagingService) WorkflowProgramInfo(co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo) CacheLoader(com.google.common.cache.CacheLoader) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramContextAware(co.cask.cdap.data.ProgramContextAware)

Example 34 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hive by apache.

the class TestRCFileMapReduceInputFormat method writeThenReadByRecordReader.

private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException {
    Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
    Path testFile = new Path(testDir, "test_rcfile");
    fs.delete(testFile, true);
    Configuration cloneConf = new Configuration(conf);
    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
        writer.append(bytes);
    }
    writer.close();
    RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable> inputFormat = new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
    Configuration jonconf = new Configuration(cloneConf);
    jonconf.set("mapred.input.dir", testDir.toString());
    JobContext context = new Job(jonconf);
    HiveConf.setLongVar(context.getConfiguration(), HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize);
    List<InputSplit> splits = inputFormat.getSplits(context);
    assertEquals("splits length should be " + splitNumber, splitNumber, splits.size());
    int readCount = 0;
    for (int i = 0; i < splits.size(); i++) {
        TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID());
        RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
        rr.initialize(splits.get(i), tac);
        while (rr.nextKeyValue()) {
            readCount++;
        }
    }
    assertEquals("readCount should be equal to writeCount", readCount, writeCount);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) RCFile(org.apache.hadoop.hive.ql.io.RCFile) LongWritable(org.apache.hadoop.io.LongWritable) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 35 with TaskAttemptID

use of org.apache.hadoop.mapreduce.TaskAttemptID in project hive by apache.

the class LlapBaseInputFormat method getRecordReader.

@SuppressWarnings("unchecked")
@Override
public RecordReader<NullWritable, V> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
    LlapInputSplit llapSplit = (LlapInputSplit) split;
    // Set conf to use LLAP user rather than current user for LLAP Zk registry.
    HiveConf.setVar(job, HiveConf.ConfVars.LLAP_ZK_REGISTRY_USER, llapSplit.getLlapUser());
    SubmitWorkInfo submitWorkInfo = SubmitWorkInfo.fromBytes(llapSplit.getPlanBytes());
    LlapServiceInstance serviceInstance = getServiceInstance(job, llapSplit);
    String host = serviceInstance.getHost();
    int llapSubmitPort = serviceInstance.getRpcPort();
    LOG.info("Found service instance for host " + host + " with rpc port " + llapSubmitPort + " and outputformat port " + serviceInstance.getOutputFormatPort());
    byte[] llapTokenBytes = llapSplit.getTokenBytes();
    Token<LlapTokenIdentifier> llapToken = null;
    if (llapTokenBytes != null) {
        DataInputBuffer in = new DataInputBuffer();
        in.reset(llapTokenBytes, 0, llapTokenBytes.length);
        llapToken = new Token<LlapTokenIdentifier>();
        llapToken.readFields(in);
    }
    LlapRecordReaderTaskUmbilicalExternalResponder umbilicalResponder = new LlapRecordReaderTaskUmbilicalExternalResponder();
    LlapTaskUmbilicalExternalClient llapClient = new LlapTaskUmbilicalExternalClient(job, submitWorkInfo.getTokenIdentifier(), submitWorkInfo.getToken(), umbilicalResponder, llapToken);
    int attemptNum = 0;
    // Use task attempt number from conf if provided
    TaskAttemptID taskAttemptId = TaskAttemptID.forName(job.get(MRJobConfig.TASK_ATTEMPT_ID));
    if (taskAttemptId != null) {
        attemptNum = taskAttemptId.getId();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Setting attempt number to " + attemptNum + " from task attempt ID in conf: " + job.get(MRJobConfig.TASK_ATTEMPT_ID));
        }
    }
    SubmitWorkRequestProto request = constructSubmitWorkRequestProto(submitWorkInfo, llapSplit.getSplitNum(), attemptNum, llapClient.getAddress(), submitWorkInfo.getToken(), llapSplit.getFragmentBytes(), llapSplit.getFragmentBytesSignature(), job);
    llapClient.submitWork(request, host, llapSubmitPort);
    Socket socket = new Socket(host, serviceInstance.getOutputFormatPort());
    LOG.debug("Socket connected");
    SignableVertexSpec vertex = SignableVertexSpec.parseFrom(submitWorkInfo.getVertexBinary());
    String fragmentId = Converters.createTaskAttemptId(vertex.getQueryIdentifier(), vertex.getVertexIndex(), request.getFragmentNumber(), request.getAttemptNumber()).toString();
    OutputStream socketStream = socket.getOutputStream();
    LlapOutputSocketInitMessage.Builder builder = LlapOutputSocketInitMessage.newBuilder().setFragmentId(fragmentId);
    if (llapSplit.getTokenBytes() != null) {
        builder.setToken(ByteString.copyFrom(llapSplit.getTokenBytes()));
    }
    builder.build().writeDelimitedTo(socketStream);
    socketStream.flush();
    LOG.info("Registered id: " + fragmentId);
    @SuppressWarnings("rawtypes") LlapBaseRecordReader recordReader = new LlapBaseRecordReader(socket.getInputStream(), llapSplit.getSchema(), Text.class, job, llapClient, (java.io.Closeable) socket);
    umbilicalResponder.setRecordReader(recordReader);
    return recordReader;
}
Also used : LlapTokenIdentifier(org.apache.hadoop.hive.llap.security.LlapTokenIdentifier) LlapOutputSocketInitMessage(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.LlapOutputSocketInitMessage) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TezTaskAttemptID(org.apache.tez.dag.records.TezTaskAttemptID) OutputStream(java.io.OutputStream) LlapServiceInstance(org.apache.hadoop.hive.llap.registry.LlapServiceInstance) ByteString(com.google.protobuf.ByteString) LlapTaskUmbilicalExternalClient(org.apache.hadoop.hive.llap.ext.LlapTaskUmbilicalExternalClient) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) SubmitWorkRequestProto(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto) SignableVertexSpec(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec) Socket(java.net.Socket)

Aggregations

TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)78 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)35 Test (org.junit.Test)34 Configuration (org.apache.hadoop.conf.Configuration)28 Path (org.apache.hadoop.fs.Path)25 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)22 IOException (java.io.IOException)19 JobID (org.apache.hadoop.mapreduce.JobID)16 TaskID (org.apache.hadoop.mapreduce.TaskID)15 File (java.io.File)14 Job (org.apache.hadoop.mapreduce.Job)14 ArrayList (java.util.ArrayList)13 JobContext (org.apache.hadoop.mapreduce.JobContext)12 LongWritable (org.apache.hadoop.io.LongWritable)11 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)8 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)8 HashMap (java.util.HashMap)7