use of cascading.scheme.Scheme in project parquet-mr by apache.
the class TestParquetTBaseScheme method testWrite.
@Test
public void testWrite() throws Exception {
Path path = new Path(parquetOutputPath);
JobConf jobConf = new JobConf();
final FileSystem fs = path.getFileSystem(jobConf);
if (fs.exists(path))
fs.delete(path, true);
Scheme sourceScheme = new TextLine(new Fields("first", "last"));
Tap source = new Hfs(sourceScheme, txtInputPath);
Scheme sinkScheme = new ParquetTBaseScheme(Name.class);
Tap sink = new Hfs(sinkScheme, parquetOutputPath);
Pipe assembly = new Pipe("namecp");
assembly = new Each(assembly, new PackThriftFunction());
HadoopFlowConnector hadoopFlowConnector = new HadoopFlowConnector();
Flow flow = hadoopFlowConnector.connect("namecp", source, sink, assembly);
flow.complete();
assertTrue(fs.exists(new Path(parquetOutputPath)));
assertTrue(fs.exists(new Path(parquetOutputPath + "/_metadata")));
assertTrue(fs.exists(new Path(parquetOutputPath + "/_common_metadata")));
}
use of cascading.scheme.Scheme in project parquet-mr by apache.
the class TestParquetTBaseScheme method doRead.
private void doRead(Scheme sourceScheme) throws Exception {
createFileForRead();
Path path = new Path(txtOutputPath);
final FileSystem fs = path.getFileSystem(new Configuration());
if (fs.exists(path))
fs.delete(path, true);
Tap source = new Hfs(sourceScheme, parquetInputPath);
Scheme sinkScheme = new TextLine(new Fields("first", "last"));
Tap sink = new Hfs(sinkScheme, txtOutputPath);
Pipe assembly = new Pipe("namecp");
assembly = new Each(assembly, new UnpackThriftFunction());
Flow flow = new HadoopFlowConnector().connect("namecp", source, sink, assembly);
flow.complete();
String result = FileUtils.readFileToString(new File(txtOutputPath + "/part-00000"));
assertEquals("Alice\tPractice\nBob\tHope\nCharlie\tHorse\n", result);
}
use of cascading.scheme.Scheme in project parquet-mr by apache.
the class ParquetScroogeSchemeTest method verifyScroogeRead.
public <T> void verifyScroogeRead(List<TBase> recordsToWrite, Class<T> readClass, String expectedStr, String projectionFilter) throws Exception {
Configuration conf = new Configuration();
deleteIfExist(PARQUET_PATH);
deleteIfExist(TXT_OUTPUT_PATH);
final Path parquetFile = new Path(PARQUET_PATH);
writeParquetFile(recordsToWrite, conf, parquetFile);
Scheme sourceScheme = new ParquetScroogeScheme(new Config().withRecordClass(readClass).withProjectionString(projectionFilter));
Tap source = new Hfs(sourceScheme, PARQUET_PATH);
Scheme sinkScheme = new TextLine(new Fields("first", "last"));
Tap sink = new Hfs(sinkScheme, TXT_OUTPUT_PATH);
Pipe assembly = new Pipe("namecp");
assembly = new Each(assembly, new ObjectToStringFunction());
Flow flow = new HadoopFlowConnector().connect("namecp", source, sink, assembly);
flow.complete();
String result = FileUtils.readFileToString(new File(TXT_OUTPUT_PATH + "/part-00000"));
assertEquals(expectedStr, result);
}
use of cascading.scheme.Scheme in project parquet-mr by apache.
the class ParquetScroogeSchemeTest method doWrite.
private void doWrite() throws Exception {
Path path = new Path(parquetOutputPath);
final FileSystem fs = path.getFileSystem(new Configuration());
if (fs.exists(path))
fs.delete(path, true);
Scheme sourceScheme = new TextLine(new Fields("first", "last"));
Tap source = new Hfs(sourceScheme, txtInputPath);
Scheme sinkScheme = new ParquetScroogeScheme<Name>(Name.class);
Tap sink = new Hfs(sinkScheme, parquetOutputPath);
Pipe assembly = new Pipe("namecp");
assembly = new Each(assembly, new PackThriftFunction());
Flow flow = new HadoopFlowConnector().connect("namecp", source, sink, assembly);
flow.complete();
}
use of cascading.scheme.Scheme in project parquet-mr by apache.
the class ParquetScroogeSchemeTest method doRead.
private void doRead() throws Exception {
Path path = new Path(txtOutputPath);
final FileSystem fs = path.getFileSystem(new Configuration());
if (fs.exists(path))
fs.delete(path, true);
Scheme sourceScheme = new ParquetScroogeScheme<Name>(Name.class);
Tap source = new Hfs(sourceScheme, parquetOutputPath);
Scheme sinkScheme = new TextLine(new Fields("first", "last"));
Tap sink = new Hfs(sinkScheme, txtOutputPath);
Pipe assembly = new Pipe("namecp");
assembly = new Each(assembly, new UnpackThriftFunction());
Flow flow = new HadoopFlowConnector().connect("namecp", source, sink, assembly);
flow.complete();
String result = FileUtils.readFileToString(new File(txtOutputPath + "/part-00000"));
assertEquals("0\tAlice\tPractice\n15\tBob\tHope\n24\tCharlie\tHorse\n", result);
}
Aggregations