use of org.apache.pig.LoadFunc in project parquet-mr by apache.
the class PerfTest2 method load.
static void load(String out, int colsToLoad, StringBuilder results) throws Exception {
StringBuilder schemaString = new StringBuilder("a0: chararray");
for (int i = 1; i < colsToLoad; i++) {
schemaString.append(", a" + i + ": chararray");
}
long t0 = System.currentTimeMillis();
Job job = new Job(conf);
int loadjobId = jobid++;
LoadFunc loadFunc = new ParquetLoader(schemaString.toString());
loadFunc.setUDFContextSignature("sigLoader" + loadjobId);
String absPath = loadFunc.relativeToAbsolutePath(out, new Path(new File(".").getAbsoluteFile().toURI()));
loadFunc.setLocation(absPath, job);
// that's how the base class is defined
@SuppressWarnings("unchecked") InputFormat<Void, Tuple> inputFormat = loadFunc.getInputFormat();
JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", loadjobId));
List<InputSplit> splits = inputFormat.getSplits(jobContext);
int i = 0;
int taskid = 0;
for (InputSplit split : splits) {
TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(ContextUtil.getConfiguration(job), new TaskAttemptID("jt", loadjobId, true, taskid++, 0));
RecordReader<Void, Tuple> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
loadFunc.prepareToRead(recordReader, null);
recordReader.initialize(split, taskAttemptContext);
Tuple t;
while ((t = loadFunc.getNext()) != null) {
if (DEBUG)
System.out.println(t);
++i;
}
}
assertEquals(ROW_COUNT, i);
long t1 = System.currentTimeMillis();
results.append((t1 - t0) + " ms to read " + colsToLoad + " columns\n");
}
use of org.apache.pig.LoadFunc in project elephant-bird by twitter.
the class TestLocationAsTuple method testTokenizedLoad.
@Test
public void testTokenizedLoad() throws IOException {
Configuration conf = new Configuration();
Job job = EasyMock.createMock(Job.class);
EasyMock.expect(HadoopCompat.getConfiguration(job)).andStubReturn(conf);
EasyMock.replay(job);
LoadFunc loader = new LocationAsTuple(",");
loader.setUDFContextSignature("foo");
loader.setLocation("a,b\tc", job);
RecordReader reader = EasyMock.createMock(RecordReader.class);
PigSplit split = EasyMock.createMock(PigSplit.class);
EasyMock.expect(split.getConf()).andStubReturn(conf);
loader.prepareToRead(reader, split);
Tuple next = loader.getNext();
assertEquals("a", next.get(0));
assertEquals("b\tc", next.get(1));
}
use of org.apache.pig.LoadFunc in project elephant-bird by twitter.
the class TestLocationAsTuple method testSimpleLoad.
@Test
public void testSimpleLoad() throws IOException {
Configuration conf = new Configuration();
Job job = EasyMock.createMock(Job.class);
EasyMock.expect(HadoopCompat.getConfiguration(job)).andStubReturn(conf);
EasyMock.replay(job);
LoadFunc loader = new LocationAsTuple();
loader.setUDFContextSignature("foo");
loader.setLocation("a\tb", job);
RecordReader reader = EasyMock.createMock(RecordReader.class);
PigSplit split = EasyMock.createMock(PigSplit.class);
EasyMock.expect(split.getConf()).andStubReturn(conf);
loader.prepareToRead(reader, split);
Tuple next = loader.getNext();
assertEquals("a", next.get(0));
assertEquals("b", next.get(1));
}
Aggregations