use of cascading.scheme.hadoop.TextLine in project parquet-mr by apache.
the class TestParquetTupleScheme method testFieldProjection.
@Test
public void testFieldProjection() throws Exception {
createFileForRead();
Path path = new Path(txtOutputPath);
final FileSystem fs = path.getFileSystem(new Configuration());
if (fs.exists(path))
fs.delete(path, true);
Scheme sourceScheme = new ParquetTupleScheme(new Fields("last_name"));
Tap source = new Hfs(sourceScheme, parquetInputPath);
Scheme sinkScheme = new TextLine(new Fields("last_name"));
Tap sink = new Hfs(sinkScheme, txtOutputPath);
Pipe assembly = new Pipe("namecp");
assembly = new Each(assembly, new ProjectedTupleFunction());
Flow flow = new HadoopFlowConnector().connect("namecp", source, sink, assembly);
flow.complete();
String result = FileUtils.readFileToString(new File(txtOutputPath + "/part-00000"));
assertEquals("Practice\nHope\nHorse\n", result);
}
use of cascading.scheme.hadoop.TextLine in project parquet-mr by apache.
the class TestParquetTupleScheme method testReadWrite.
public void testReadWrite(String inputPath) throws Exception {
createFileForRead();
Path path = new Path(txtOutputPath);
final FileSystem fs = path.getFileSystem(new Configuration());
if (fs.exists(path))
fs.delete(path, true);
Scheme sourceScheme = new ParquetTupleScheme(new Fields("first_name", "last_name"));
Tap source = new Hfs(sourceScheme, inputPath);
Scheme sinkScheme = new TextLine(new Fields("first", "last"));
Tap sink = new Hfs(sinkScheme, txtOutputPath);
Pipe assembly = new Pipe("namecp");
assembly = new Each(assembly, new UnpackTupleFunction());
Flow flow = new HadoopFlowConnector().connect("namecp", source, sink, assembly);
flow.complete();
String result = FileUtils.readFileToString(new File(txtOutputPath + "/part-00000"));
assertEquals("Alice\tPractice\nBob\tHope\nCharlie\tHorse\n", result);
}
Aggregations