use of org.apache.hadoop.mapreduce.InputSplit in project flink by apache.
the class HadoopInputFormatTest method testOpen.
@Test
public void testOpen() throws Exception {
DummyInputFormat inputFormat = mock(DummyInputFormat.class);
when(inputFormat.createRecordReader(any(InputSplit.class), any(TaskAttemptContext.class))).thenReturn(new DummyRecordReader());
HadoopInputSplit inputSplit = mock(HadoopInputSplit.class);
HadoopInputFormat<String, Long> hadoopInputFormat = setupHadoopInputFormat(inputFormat, Job.getInstance(), null);
hadoopInputFormat.open(inputSplit);
verify(inputFormat, times(1)).createRecordReader(any(InputSplit.class), any(TaskAttemptContext.class));
assertThat(hadoopInputFormat.fetched, is(false));
}
use of org.apache.hadoop.mapreduce.InputSplit in project hive by apache.
the class DataReaderSlave method main.
public static void main(String[] args) throws IOException, ClassNotFoundException {
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[0])));
ReaderContext cntxt = (ReaderContext) ois.readObject();
ois.close();
String[] inpSlitsToRead = args[1].split(",");
List<InputSplit> splits = cntxt.getSplits();
for (int i = 0; i < inpSlitsToRead.length; i++) {
InputSplit split = splits.get(Integer.parseInt(inpSlitsToRead[i]));
HCatReader reader = DataTransferFactory.getHCatReader(split, cntxt.getConf());
Iterator<HCatRecord> itr = reader.read();
File f = new File(args[2] + "-" + i);
f.delete();
BufferedWriter outFile = new BufferedWriter(new FileWriter(f));
while (itr.hasNext()) {
String rec = itr.next().toString().replaceFirst("\\s+$", "");
System.err.println(rec);
outFile.write(rec + "\n");
}
outFile.close();
}
}
use of org.apache.hadoop.mapreduce.InputSplit in project hive by apache.
the class TestE2EScenarios method copyTable.
private void copyTable(String in, String out) throws IOException, InterruptedException {
Job ijob = new Job();
Job ojob = new Job();
HCatInputFormat inpy = new HCatInputFormat();
inpy.setInput(ijob, null, in);
HCatOutputFormat oupy = new HCatOutputFormat();
oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));
// Test HCatContext
System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
if (HCatContext.INSTANCE.getConf().isPresent()) {
System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get().getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
}
HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
oupy.setSchema(ojob, tableSchema);
oupy.checkOutputSpecs(ojob);
OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
oc.setupJob(ojob);
for (InputSplit split : inpy.getSplits(ijob)) {
TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());
RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
rr.initialize(split, rtaskContext);
OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
taskOc.setupTask(wtaskContext);
RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);
while (rr.nextKeyValue()) {
rw.write(rr.getCurrentKey(), rr.getCurrentValue());
}
rw.close(wtaskContext);
taskOc.commitTask(wtaskContext);
rr.close();
}
oc.commitJob(ojob);
}
use of org.apache.hadoop.mapreduce.InputSplit in project hive by apache.
the class ReaderContextImpl method writeExternal.
@Override
public void writeExternal(ObjectOutput out) throws IOException {
conf.write(out);
out.writeInt(splits.size());
for (InputSplit split : splits) {
((HCatSplit) split).write(out);
}
}
use of org.apache.hadoop.mapreduce.InputSplit in project crunch by cloudera.
the class CrunchInputFormat method getSplits.
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException {
List<InputSplit> splits = Lists.newArrayList();
Configuration conf = job.getConfiguration();
Map<InputBundle, Map<Integer, List<Path>>> formatNodeMap = CrunchInputs.getFormatNodeMap(job);
// First, build a map of InputFormats to Paths
for (Map.Entry<InputBundle, Map<Integer, List<Path>>> entry : formatNodeMap.entrySet()) {
InputBundle inputBundle = entry.getKey();
Job jobCopy = new Job(conf);
InputFormat<?, ?> format = (InputFormat<?, ?>) ReflectionUtils.newInstance(inputBundle.getInputFormatClass(), jobCopy.getConfiguration());
for (Map.Entry<Integer, List<Path>> nodeEntry : entry.getValue().entrySet()) {
Integer nodeIndex = nodeEntry.getKey();
List<Path> paths = nodeEntry.getValue();
FileInputFormat.setInputPaths(jobCopy, paths.toArray(new Path[paths.size()]));
// Get splits for each input path and tag with InputFormat
// and Mapper types by wrapping in a TaggedInputSplit.
List<InputSplit> pathSplits = format.getSplits(jobCopy);
for (InputSplit pathSplit : pathSplits) {
splits.add(new CrunchInputSplit(pathSplit, inputBundle.getInputFormatClass(), inputBundle.getExtraConfiguration(), nodeIndex, jobCopy.getConfiguration()));
}
}
}
return splits;
}
Aggregations