use of org.apache.hadoop.mapreduce.lib.output.FileOutputFormat in project spark-dataflow by cloudera.
the class HadoopFileFormatPipelineTest method testSequenceFile.
@Test
public void testSequenceFile() throws Exception {
populateFile();
Pipeline p = Pipeline.create(PipelineOptionsFactory.create());
@SuppressWarnings("unchecked") Class<? extends FileInputFormat<IntWritable, Text>> inputFormatClass = (Class<? extends FileInputFormat<IntWritable, Text>>) (Class<?>) SequenceFileInputFormat.class;
HadoopIO.Read.Bound<IntWritable, Text> read = HadoopIO.Read.from(inputFile.getAbsolutePath(), inputFormatClass, IntWritable.class, Text.class);
PCollection<KV<IntWritable, Text>> input = p.apply(read);
@SuppressWarnings("unchecked") Class<? extends FileOutputFormat<IntWritable, Text>> outputFormatClass = (Class<? extends FileOutputFormat<IntWritable, Text>>) (Class<?>) TemplatedSequenceFileOutputFormat.class;
@SuppressWarnings("unchecked") HadoopIO.Write.Bound<IntWritable, Text> write = HadoopIO.Write.to(outputFile.getAbsolutePath(), outputFormatClass, IntWritable.class, Text.class);
input.apply(write.withoutSharding());
EvaluationResult res = SparkPipelineRunner.create().run(p);
res.close();
IntWritable key = new IntWritable();
Text value = new Text();
try (Reader reader = new Reader(new Configuration(), Reader.file(new Path(outputFile.toURI())))) {
int i = 0;
while (reader.next(key, value)) {
assertEquals(i, key.get());
assertEquals("value-" + i, value.toString());
i++;
}
}
}
use of org.apache.hadoop.mapreduce.lib.output.FileOutputFormat in project goldenorb by jzachr.
the class OrbPartition method dumpData.
private void dumpData() {
Configuration conf = new Configuration();
Job job = null;
JobContext jobContext = null;
TaskAttemptContext tao = null;
RecordWriter rw;
VertexWriter vw;
FileOutputFormat outputFormat;
boolean tryAgain = true;
int count = 0;
while (tryAgain && count < 15) try {
count++;
tryAgain = false;
if (job == null) {
job = new Job(conf);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
}
if (jobContext == null) {
jobContext = new JobContext(job.getConfiguration(), new JobID());
}
System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));
tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
rw = outputFormat.getRecordWriter(tao);
vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
for (Vertex v : vertices.values()) {
OrbContext oc = vw.vertexWrite(v);
rw.write(oc.getKey(), oc.getValue());
// orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
// oc.getKey().toString() + ", " + oc.getValue().toString());
}
rw.close(tao);
FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
if (cm.needsTaskCommit(tao)) {
cm.commitTask(tao);
cm.cleanupJob(jobContext);
} else {
cm.cleanupJob(jobContext);
tryAgain = true;
}
} catch (IOException e) {
tryAgain = true;
e.printStackTrace();
} catch (InstantiationException e) {
tryAgain = true;
e.printStackTrace();
} catch (IllegalAccessException e) {
tryAgain = true;
e.printStackTrace();
} catch (ClassNotFoundException e) {
tryAgain = true;
e.printStackTrace();
} catch (InterruptedException e) {
tryAgain = true;
e.printStackTrace();
}
if (tryAgain) {
synchronized (this) {
try {
wait(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
Aggregations