Search in sources :

Example 71 with JobID

use of org.apache.hadoop.mapreduce.JobID in project beam by apache.

the class HadoopFormatIOSequenceFileTest method extractResultsFromFile.

private Stream<KV<Text, LongWritable>> extractResultsFromFile(String fileName) {
    try (SequenceFileRecordReader<Text, LongWritable> reader = new SequenceFileRecordReader<>()) {
        Path path = new Path(fileName);
        TaskAttemptContext taskContext = HadoopFormats.createTaskAttemptContext(new Configuration(), new JobID("readJob", 0), 0);
        reader.initialize(new FileSplit(path, 0L, Long.MAX_VALUE, new String[] { "localhost" }), taskContext);
        List<KV<Text, LongWritable>> result = new ArrayList<>();
        while (reader.nextKeyValue()) {
            result.add(KV.of(new Text(reader.getCurrentKey().toString()), new LongWritable(reader.getCurrentValue().get())));
        }
        return result.stream();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) SequenceFileRecordReader(org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader) Text(org.apache.hadoop.io.Text) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) KV(org.apache.beam.sdk.values.KV) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) LongWritable(org.apache.hadoop.io.LongWritable) JobID(org.apache.hadoop.mapreduce.JobID)

Example 72 with JobID

use of org.apache.hadoop.mapreduce.JobID in project angel by Tencent.

the class AngelYarnClient method startPSServer.

@Override
public void startPSServer() throws AngelException {
    try {
        setUser();
        setLocalAddr();
        Path stagingDir = AngelApps.getStagingDir(conf, userName);
        // 2.get job id
        yarnClient = YarnClient.createYarnClient();
        YarnConfiguration yarnConf = new YarnConfiguration(conf);
        yarnClient.init(yarnConf);
        yarnClient.start();
        YarnClientApplication newApp;
        newApp = yarnClient.createApplication();
        GetNewApplicationResponse newAppResponse = newApp.getNewApplicationResponse();
        appId = newAppResponse.getApplicationId();
        JobID jobId = TypeConverter.fromYarn(appId);
        Path submitJobDir = new Path(stagingDir, appId.toString());
        jtFs = submitJobDir.getFileSystem(conf);
        conf.set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");
        conf.set(AngelConf.ANGEL_JOB_DIR, submitJobDir.toString());
        conf.set(AngelConf.ANGEL_JOB_ID, jobId.toString());
        setInputDirectory();
        setOutputDirectory();
        // Credentials credentials = new Credentials();
        credentials.addAll(UserGroupInformation.getCurrentUser().getCredentials());
        TokenCache.obtainTokensForNamenodes(credentials, new Path[] { submitJobDir }, conf);
        checkParameters(conf);
        handleDeprecatedParameters(conf);
        // 4.copy resource files to hdfs
        copyAndConfigureFiles(conf, submitJobDir, (short) 10);
        // 5.write configuration to a xml file
        Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
        TokenCache.cleanUpTokenReferral(conf);
        writeConf(conf, submitJobFile);
        // 6.create am container context
        ApplicationSubmissionContext appContext = createApplicationSubmissionContext(conf, submitJobDir, credentials, appId);
        conf.set(AngelConf.ANGEL_JOB_LIBJARS, "");
        // 7.Submit to ResourceManager
        appId = yarnClient.submitApplication(appContext);
        // 8.get app master client
        updateMaster(10 * 60);
        waitForAllPS(conf.getInt(AngelConf.ANGEL_PS_NUMBER, AngelConf.DEFAULT_ANGEL_PS_NUMBER));
        LOG.info("start pss success");
    } catch (Exception x) {
        LOG.error("submit application to yarn failed.", x);
        throw new AngelException(x);
    }
}
Also used : AngelException(com.tencent.angel.exception.AngelException) GetNewApplicationResponse(org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse) YarnClientApplication(org.apache.hadoop.yarn.client.api.YarnClientApplication) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) JobID(org.apache.hadoop.mapreduce.JobID) ServiceException(com.google.protobuf.ServiceException) URISyntaxException(java.net.URISyntaxException) AngelException(com.tencent.angel.exception.AngelException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException)

Example 73 with JobID

use of org.apache.hadoop.mapreduce.JobID in project flink by apache.

the class HadoopInputFormatBase method createInputSplits.

@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
    JobContext jobContext = new JobContextImpl(configuration, new JobID());
    jobContext.getCredentials().addAll(this.credentials);
    Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
    if (currentUserCreds != null) {
        jobContext.getCredentials().addAll(currentUserCreds);
    }
    List<org.apache.hadoop.mapreduce.InputSplit> splits;
    try {
        splits = this.mapreduceInputFormat.getSplits(jobContext);
    } catch (InterruptedException e) {
        throw new IOException("Could not get Splits.", e);
    }
    HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
    for (int i = 0; i < hadoopInputSplits.length; i++) {
        hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
    }
    return hadoopInputSplits;
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) HadoopInputSplit(org.apache.flink.api.java.hadoop.mapreduce.wrapper.HadoopInputSplit) IOException(java.io.IOException) JobContext(org.apache.hadoop.mapreduce.JobContext) HadoopInputSplit(org.apache.flink.api.java.hadoop.mapreduce.wrapper.HadoopInputSplit) JobID(org.apache.hadoop.mapreduce.JobID) Credentials(org.apache.hadoop.security.Credentials)

Example 74 with JobID

use of org.apache.hadoop.mapreduce.JobID in project flink by apache.

the class HadoopOutputFormatBase method open.

/**
 * create the temporary output file for hadoop RecordWriter.
 *
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        if (Integer.toString(taskNumber + 1).length() > 6) {
            throw new IOException("Task id too large.");
        }
        this.taskNumber = taskNumber + 1;
        // for hadoop 2.2
        this.configuration.set("mapreduce.output.basename", "tmp");
        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
        this.configuration.set("mapred.task.id", taskAttemptID.toString());
        this.configuration.setInt("mapred.task.partition", taskNumber + 1);
        // for hadoop 2.2
        this.configuration.set("mapreduce.task.attempt.id", taskAttemptID.toString());
        this.configuration.setInt("mapreduce.task.partition", taskNumber + 1);
        try {
            this.context = new TaskAttemptContextImpl(this.configuration, taskAttemptID);
            this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context);
            this.outputCommitter.setupJob(new JobContextImpl(this.configuration, new JobID()));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        this.context.getCredentials().addAll(this.credentials);
        Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
        if (currentUserCreds != null) {
            this.context.getCredentials().addAll(currentUserCreds);
        }
        // 1.2.1
        if (outputCommitter instanceof FileOutputCommitter) {
            this.configuration.set("mapreduce.task.output.dir", ((FileOutputCommitter) this.outputCommitter).getWorkPath().toString());
        }
        try {
            this.recordWriter = this.mapreduceOutputFormat.getRecordWriter(this.context);
        } catch (InterruptedException e) {
            throw new IOException("Could not create RecordWriter.", e);
        }
    }
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) IOException(java.io.IOException) JobID(org.apache.hadoop.mapreduce.JobID) IOException(java.io.IOException) Credentials(org.apache.hadoop.security.Credentials)

Example 75 with JobID

use of org.apache.hadoop.mapreduce.JobID in project goldenorb by jzachr.

the class OrbPartition method dumpData.

private void dumpData() {
    Configuration conf = new Configuration();
    Job job = null;
    JobContext jobContext = null;
    TaskAttemptContext tao = null;
    RecordWriter rw;
    VertexWriter vw;
    FileOutputFormat outputFormat;
    boolean tryAgain = true;
    int count = 0;
    while (tryAgain && count < 15) try {
        count++;
        tryAgain = false;
        if (job == null) {
            job = new Job(conf);
            job.setOutputFormatClass(TextOutputFormat.class);
            FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
        }
        if (jobContext == null) {
            jobContext = new JobContext(job.getConfiguration(), new JobID());
        }
        System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));
        tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
        outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
        rw = outputFormat.getRecordWriter(tao);
        vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
        for (Vertex v : vertices.values()) {
            OrbContext oc = vw.vertexWrite(v);
            rw.write(oc.getKey(), oc.getValue());
        // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
        // oc.getKey().toString() + ", " + oc.getValue().toString());
        }
        rw.close(tao);
        FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
        if (cm.needsTaskCommit(tao)) {
            cm.commitTask(tao);
            cm.cleanupJob(jobContext);
        } else {
            cm.cleanupJob(jobContext);
            tryAgain = true;
        }
    } catch (IOException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (InstantiationException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (InterruptedException e) {
        tryAgain = true;
        e.printStackTrace();
    }
    if (tryAgain) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}
Also used : FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) Path(org.apache.hadoop.fs.Path) TaskID(org.apache.hadoop.mapreduce.TaskID) Configuration(org.apache.hadoop.conf.Configuration) OrbConfiguration(org.goldenorb.conf.OrbConfiguration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) OrbContext(org.goldenorb.io.output.OrbContext) VertexWriter(org.goldenorb.io.output.VertexWriter) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) JobID(org.apache.hadoop.mapreduce.JobID)

Aggregations

JobID (org.apache.hadoop.mapreduce.JobID)79 Test (org.junit.Test)33 IOException (java.io.IOException)25 Configuration (org.apache.hadoop.conf.Configuration)20 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)20 TaskID (org.apache.hadoop.mapreduce.TaskID)18 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)17 Path (org.apache.hadoop.fs.Path)14 ArrayList (java.util.ArrayList)11 Job (org.apache.hadoop.mapreduce.Job)11 JobConf (org.apache.hadoop.mapred.JobConf)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 JobContext (org.apache.hadoop.mapreduce.JobContext)7 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)7 EventHandler (org.apache.hadoop.yarn.event.EventHandler)7 HashMap (java.util.HashMap)6 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)6 TaskAttemptInfo (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo)6 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)5 Event (org.apache.hadoop.mapreduce.jobhistory.Event)5