use of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit in project elephant-bird by twitter.
the class AbstractTestWritableConverter method readOutsidePig.
@Test
public void readOutsidePig() throws ClassCastException, ParseException, ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, InterruptedException {
// simulate Pig front-end runtime
final SequenceFileLoader<IntWritable, Text> loader = new SequenceFileLoader<IntWritable, Text>(String.format("-c %s", IntWritableConverter.class.getName()), String.format("-c %s %s", writableConverterClass.getName(), writableConverterArguments));
Job job = new Job();
loader.setUDFContextSignature("12345");
loader.setLocation(tempFilename, job);
// simulate Pig back-end runtime
final RecordReader<DataInputBuffer, DataInputBuffer> reader = new RawSequenceFileRecordReader();
final FileSplit fileSplit = new FileSplit(new Path(tempFilename), 0, new File(tempFilename).length(), new String[] { "localhost" });
final TaskAttemptContext context = HadoopCompat.newTaskAttemptContext(HadoopCompat.getConfiguration(job), new TaskAttemptID());
reader.initialize(fileSplit, context);
final InputSplit[] wrappedSplits = new InputSplit[] { fileSplit };
final int inputIndex = 0;
final List<OperatorKey> targetOps = Arrays.asList(new OperatorKey("54321", 0));
final int splitIndex = 0;
final PigSplit split = new PigSplit(wrappedSplits, inputIndex, targetOps, splitIndex);
split.setConf(HadoopCompat.getConfiguration(job));
loader.prepareToRead(reader, split);
// read tuples and validate
validate(new LoadFuncTupleIterator(loader));
}
use of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit in project elephant-bird by twitter.
the class TestSequenceFileStorage method readOutsidePig.
@Test
public void readOutsidePig() throws ClassCastException, ParseException, ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, InterruptedException {
// simulate Pig front-end runtime
final SequenceFileLoader<IntWritable, Text> storage = new SequenceFileLoader<IntWritable, Text>("-c " + IntWritableConverter.class.getName(), "-c " + TextConverter.class.getName());
Job job = new Job();
storage.setUDFContextSignature("12345");
storage.setLocation(tempFilename, job);
// simulate Pig back-end runtime
RecordReader<DataInputBuffer, DataInputBuffer> reader = new RawSequenceFileRecordReader();
FileSplit fileSplit = new FileSplit(new Path(tempFilename), 0, new File(tempFilename).length(), new String[] { "localhost" });
TaskAttemptContext context = HadoopCompat.newTaskAttemptContext(HadoopCompat.getConfiguration(job), new TaskAttemptID());
reader.initialize(fileSplit, context);
InputSplit[] wrappedSplits = new InputSplit[] { fileSplit };
int inputIndex = 0;
List<OperatorKey> targetOps = Arrays.asList(new OperatorKey("54321", 0));
int splitIndex = 0;
PigSplit split = new PigSplit(wrappedSplits, inputIndex, targetOps, splitIndex);
split.setConf(HadoopCompat.getConfiguration(job));
storage.prepareToRead(reader, split);
// read tuples and validate
validate(new LoadFuncTupleIterator(storage));
}
use of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit in project elephant-bird by twitter.
the class TestLocationAsTuple method testSimpleLoad.
@Test
public void testSimpleLoad() throws IOException {
Configuration conf = new Configuration();
Job job = EasyMock.createMock(Job.class);
EasyMock.expect(HadoopCompat.getConfiguration(job)).andStubReturn(conf);
EasyMock.replay(job);
LoadFunc loader = new LocationAsTuple();
loader.setUDFContextSignature("foo");
loader.setLocation("a\tb", job);
RecordReader reader = EasyMock.createMock(RecordReader.class);
PigSplit split = EasyMock.createMock(PigSplit.class);
EasyMock.expect(split.getConf()).andStubReturn(conf);
loader.prepareToRead(reader, split);
Tuple next = loader.getNext();
assertEquals("a", next.get(0));
assertEquals("b", next.get(1));
}
use of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit in project elephant-bird by twitter.
the class TestLocationAsTuple method testTokenizedLoad.
@Test
public void testTokenizedLoad() throws IOException {
Configuration conf = new Configuration();
Job job = EasyMock.createMock(Job.class);
EasyMock.expect(HadoopCompat.getConfiguration(job)).andStubReturn(conf);
EasyMock.replay(job);
LoadFunc loader = new LocationAsTuple(",");
loader.setUDFContextSignature("foo");
loader.setLocation("a,b\tc", job);
RecordReader reader = EasyMock.createMock(RecordReader.class);
PigSplit split = EasyMock.createMock(PigSplit.class);
EasyMock.expect(split.getConf()).andStubReturn(conf);
loader.prepareToRead(reader, split);
Tuple next = loader.getNext();
assertEquals("a", next.get(0));
assertEquals("b\tc", next.get(1));
}
Aggregations