use of com.twitter.elephantbird.pig.util.LoadFuncTupleIterator in project elephant-bird by twitter.
the class TestSequenceFileStorage method readOutsidePig.
@Test
public void readOutsidePig() throws ClassCastException, ParseException, ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, InterruptedException {
// simulate Pig front-end runtime
final SequenceFileLoader<IntWritable, Text> storage = new SequenceFileLoader<IntWritable, Text>("-c " + IntWritableConverter.class.getName(), "-c " + TextConverter.class.getName());
Job job = new Job();
storage.setUDFContextSignature("12345");
storage.setLocation(tempFilename, job);
// simulate Pig back-end runtime
RecordReader<DataInputBuffer, DataInputBuffer> reader = new RawSequenceFileRecordReader();
FileSplit fileSplit = new FileSplit(new Path(tempFilename), 0, new File(tempFilename).length(), new String[] { "localhost" });
TaskAttemptContext context = HadoopCompat.newTaskAttemptContext(HadoopCompat.getConfiguration(job), new TaskAttemptID());
reader.initialize(fileSplit, context);
InputSplit[] wrappedSplits = new InputSplit[] { fileSplit };
int inputIndex = 0;
List<OperatorKey> targetOps = Arrays.asList(new OperatorKey("54321", 0));
int splitIndex = 0;
PigSplit split = new PigSplit(wrappedSplits, inputIndex, targetOps, splitIndex);
split.setConf(HadoopCompat.getConfiguration(job));
storage.prepareToRead(reader, split);
// read tuples and validate
validate(new LoadFuncTupleIterator(storage));
}
Aggregations