use of org.apache.hadoop.mapreduce.JobContext in project parquet-mr by apache.
the class PerfTest2 method load.
static void load(String out, int colsToLoad, StringBuilder results) throws Exception {
StringBuilder schemaString = new StringBuilder("a0: chararray");
for (int i = 1; i < colsToLoad; i++) {
schemaString.append(", a" + i + ": chararray");
}
long t0 = System.currentTimeMillis();
Job job = new Job(conf);
int loadjobId = jobid++;
LoadFunc loadFunc = new ParquetLoader(schemaString.toString());
loadFunc.setUDFContextSignature("sigLoader" + loadjobId);
String absPath = loadFunc.relativeToAbsolutePath(out, new Path(new File(".").getAbsoluteFile().toURI()));
loadFunc.setLocation(absPath, job);
// that's how the base class is defined
@SuppressWarnings("unchecked") InputFormat<Void, Tuple> inputFormat = loadFunc.getInputFormat();
JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", loadjobId));
List<InputSplit> splits = inputFormat.getSplits(jobContext);
int i = 0;
int taskid = 0;
for (InputSplit split : splits) {
TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(ContextUtil.getConfiguration(job), new TaskAttemptID("jt", loadjobId, true, taskid++, 0));
RecordReader<Void, Tuple> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
loadFunc.prepareToRead(recordReader, null);
recordReader.initialize(split, taskAttemptContext);
Tuple t;
while ((t = loadFunc.getNext()) != null) {
if (DEBUG)
System.out.println(t);
++i;
}
}
assertEquals(ROW_COUNT, i);
long t1 = System.currentTimeMillis();
results.append((t1 - t0) + " ms to read " + colsToLoad + " columns\n");
}
use of org.apache.hadoop.mapreduce.JobContext in project cdap by caskdata.
the class DatasetInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
try (DatasetAccessor datasetAccessor = new DatasetAccessor(jobConf)) {
try {
datasetAccessor.initialize();
} catch (Exception e) {
throw new IOException("Could not get dataset", e);
}
try (RecordScannable recordScannable = datasetAccessor.getDataset()) {
Job job = new Job(jobConf);
JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);
List<Split> dsSplits = recordScannable.getSplits();
InputSplit[] inputSplits = new InputSplit[dsSplits.size()];
for (int i = 0; i < dsSplits.size(); i++) {
inputSplits[i] = new DatasetInputSplit(dsSplits.get(i), tablePaths[0]);
}
return inputSplits;
}
}
}
use of org.apache.hadoop.mapreduce.JobContext in project cdap by caskdata.
the class MultipleOutputsCommitter method commitJob.
@Override
public void commitJob(JobContext jobContext) throws IOException {
rootOutputcommitter.commitJob(jobContext);
for (Map.Entry<String, OutputCommitter> committer : committers.entrySet()) {
JobContext namedJobContext = MultipleOutputs.getNamedJobContext(jobContext, committer.getKey());
committer.getValue().commitJob(namedJobContext);
}
}
use of org.apache.hadoop.mapreduce.JobContext in project cdap by caskdata.
the class MultipleOutputsCommitter method abortJob.
@Override
public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException {
rootOutputcommitter.abortJob(jobContext, state);
for (Map.Entry<String, OutputCommitter> committer : committers.entrySet()) {
JobContext namedJobContext = MultipleOutputs.getNamedJobContext(jobContext, committer.getKey());
committer.getValue().abortJob(namedJobContext, state);
}
}
use of org.apache.hadoop.mapreduce.JobContext in project mongo-hadoop by mongodb.
the class GridFSInputFormatTest method testReadWholeFileNoDelimiter.
@Test
public void testReadWholeFileNoDelimiter() throws IOException, InterruptedException {
Configuration conf = getConfiguration();
MongoConfigUtil.setGridFSWholeFileSplit(conf, true);
JobContext jobContext = mockJobContext(conf);
List<InputSplit> splits = inputFormat.getSplits(jobContext);
// Empty delimiter == no delimiter.
MongoConfigUtil.setGridFSDelimiterPattern(conf, "");
TaskAttemptContext context = mockTaskAttemptContext(conf);
assertEquals(1, splits.size());
String fileText = null;
for (InputSplit split : splits) {
GridFSInputFormat.GridFSTextRecordReader reader = new GridFSInputFormat.GridFSTextRecordReader();
reader.initialize(split, context);
int i;
for (i = 0; reader.nextKeyValue(); ++i) {
fileText = reader.getCurrentValue().toString();
}
assertEquals(1, i);
}
assertEquals(fileContents.toString(), fileText);
}
Aggregations