use of org.apache.hadoop.mapreduce.JobID in project beam by apache.
the class HadoopFormatIOSequenceFileTest method extractResultsFromFile.
private Stream<KV<Text, LongWritable>> extractResultsFromFile(String fileName) {
try (SequenceFileRecordReader<Text, LongWritable> reader = new SequenceFileRecordReader<>()) {
Path path = new Path(fileName);
TaskAttemptContext taskContext = HadoopFormats.createTaskAttemptContext(new Configuration(), new JobID("readJob", 0), 0);
reader.initialize(new FileSplit(path, 0L, Long.MAX_VALUE, new String[] { "localhost" }), taskContext);
List<KV<Text, LongWritable>> result = new ArrayList<>();
while (reader.nextKeyValue()) {
result.add(KV.of(new Text(reader.getCurrentKey().toString()), new LongWritable(reader.getCurrentValue().get())));
}
return result.stream();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.mapreduce.JobID in project angel by Tencent.
the class AngelYarnClient method startPSServer.
@Override
public void startPSServer() throws AngelException {
try {
setUser();
setLocalAddr();
Path stagingDir = AngelApps.getStagingDir(conf, userName);
// 2.get job id
yarnClient = YarnClient.createYarnClient();
YarnConfiguration yarnConf = new YarnConfiguration(conf);
yarnClient.init(yarnConf);
yarnClient.start();
YarnClientApplication newApp;
newApp = yarnClient.createApplication();
GetNewApplicationResponse newAppResponse = newApp.getNewApplicationResponse();
appId = newAppResponse.getApplicationId();
JobID jobId = TypeConverter.fromYarn(appId);
Path submitJobDir = new Path(stagingDir, appId.toString());
jtFs = submitJobDir.getFileSystem(conf);
conf.set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");
conf.set(AngelConf.ANGEL_JOB_DIR, submitJobDir.toString());
conf.set(AngelConf.ANGEL_JOB_ID, jobId.toString());
setInputDirectory();
setOutputDirectory();
// Credentials credentials = new Credentials();
credentials.addAll(UserGroupInformation.getCurrentUser().getCredentials());
TokenCache.obtainTokensForNamenodes(credentials, new Path[] { submitJobDir }, conf);
checkParameters(conf);
handleDeprecatedParameters(conf);
// 4.copy resource files to hdfs
copyAndConfigureFiles(conf, submitJobDir, (short) 10);
// 5.write configuration to a xml file
Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
TokenCache.cleanUpTokenReferral(conf);
writeConf(conf, submitJobFile);
// 6.create am container context
ApplicationSubmissionContext appContext = createApplicationSubmissionContext(conf, submitJobDir, credentials, appId);
conf.set(AngelConf.ANGEL_JOB_LIBJARS, "");
// 7.Submit to ResourceManager
appId = yarnClient.submitApplication(appContext);
// 8.get app master client
updateMaster(10 * 60);
waitForAllPS(conf.getInt(AngelConf.ANGEL_PS_NUMBER, AngelConf.DEFAULT_ANGEL_PS_NUMBER));
LOG.info("start pss success");
} catch (Exception x) {
LOG.error("submit application to yarn failed.", x);
throw new AngelException(x);
}
}
use of org.apache.hadoop.mapreduce.JobID in project flink by apache.
the class HadoopInputFormatBase method createInputSplits.
@Override
public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException {
configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits);
JobContext jobContext = new JobContextImpl(configuration, new JobID());
jobContext.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
jobContext.getCredentials().addAll(currentUserCreds);
}
List<org.apache.hadoop.mapreduce.InputSplit> splits;
try {
splits = this.mapreduceInputFormat.getSplits(jobContext);
} catch (InterruptedException e) {
throw new IOException("Could not get Splits.", e);
}
HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()];
for (int i = 0; i < hadoopInputSplits.length; i++) {
hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext);
}
return hadoopInputSplits;
}
use of org.apache.hadoop.mapreduce.JobID in project flink by apache.
the class HadoopOutputFormatBase method open.
/**
* create the temporary output file for hadoop RecordWriter.
*
* @param taskNumber The number of the parallel instance.
* @param numTasks The number of parallel tasks.
* @throws java.io.IOException
*/
@Override
public void open(int taskNumber, int numTasks) throws IOException {
// enforce sequential open() calls
synchronized (OPEN_MUTEX) {
if (Integer.toString(taskNumber + 1).length() > 6) {
throw new IOException("Task id too large.");
}
this.taskNumber = taskNumber + 1;
// for hadoop 2.2
this.configuration.set("mapreduce.output.basename", "tmp");
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
this.configuration.set("mapred.task.id", taskAttemptID.toString());
this.configuration.setInt("mapred.task.partition", taskNumber + 1);
// for hadoop 2.2
this.configuration.set("mapreduce.task.attempt.id", taskAttemptID.toString());
this.configuration.setInt("mapreduce.task.partition", taskNumber + 1);
try {
this.context = new TaskAttemptContextImpl(this.configuration, taskAttemptID);
this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context);
this.outputCommitter.setupJob(new JobContextImpl(this.configuration, new JobID()));
} catch (Exception e) {
throw new RuntimeException(e);
}
this.context.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
this.context.getCredentials().addAll(currentUserCreds);
}
// 1.2.1
if (outputCommitter instanceof FileOutputCommitter) {
this.configuration.set("mapreduce.task.output.dir", ((FileOutputCommitter) this.outputCommitter).getWorkPath().toString());
}
try {
this.recordWriter = this.mapreduceOutputFormat.getRecordWriter(this.context);
} catch (InterruptedException e) {
throw new IOException("Could not create RecordWriter.", e);
}
}
}
use of org.apache.hadoop.mapreduce.JobID in project goldenorb by jzachr.
the class OrbPartition method dumpData.
private void dumpData() {
Configuration conf = new Configuration();
Job job = null;
JobContext jobContext = null;
TaskAttemptContext tao = null;
RecordWriter rw;
VertexWriter vw;
FileOutputFormat outputFormat;
boolean tryAgain = true;
int count = 0;
while (tryAgain && count < 15) try {
count++;
tryAgain = false;
if (job == null) {
job = new Job(conf);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
}
if (jobContext == null) {
jobContext = new JobContext(job.getConfiguration(), new JobID());
}
System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));
tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
rw = outputFormat.getRecordWriter(tao);
vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
for (Vertex v : vertices.values()) {
OrbContext oc = vw.vertexWrite(v);
rw.write(oc.getKey(), oc.getValue());
// orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
// oc.getKey().toString() + ", " + oc.getValue().toString());
}
rw.close(tao);
FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
if (cm.needsTaskCommit(tao)) {
cm.commitTask(tao);
cm.cleanupJob(jobContext);
} else {
cm.cleanupJob(jobContext);
tryAgain = true;
}
} catch (IOException e) {
tryAgain = true;
e.printStackTrace();
} catch (InstantiationException e) {
tryAgain = true;
e.printStackTrace();
} catch (IllegalAccessException e) {
tryAgain = true;
e.printStackTrace();
} catch (ClassNotFoundException e) {
tryAgain = true;
e.printStackTrace();
} catch (InterruptedException e) {
tryAgain = true;
e.printStackTrace();
}
if (tryAgain) {
synchronized (this) {
try {
wait(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
Aggregations