use of org.apache.parquet.cascading.ParquetValueScheme.Config in project parquet-mr by apache.
the class ParquetScroogeSchemeTest method verifyScroogeRead.
public <T> void verifyScroogeRead(List<TBase> recordsToWrite, Class<T> readClass, String expectedStr, String projectionFilter) throws Exception {
Configuration conf = new Configuration();
deleteIfExist(PARQUET_PATH);
deleteIfExist(TXT_OUTPUT_PATH);
final Path parquetFile = new Path(PARQUET_PATH);
writeParquetFile(recordsToWrite, conf, parquetFile);
Scheme sourceScheme = new ParquetScroogeScheme(new Config().withRecordClass(readClass).withProjectionString(projectionFilter));
Tap source = new Hfs(sourceScheme, PARQUET_PATH);
Scheme sinkScheme = new TextLine(new Fields("first", "last"));
Tap sink = new Hfs(sinkScheme, TXT_OUTPUT_PATH);
Pipe assembly = new Pipe("namecp");
assembly = new Each(assembly, new ObjectToStringFunction());
Flow flow = new HadoopFlowConnector().connect("namecp", source, sink, assembly);
flow.complete();
String result = FileUtils.readFileToString(new File(TXT_OUTPUT_PATH + "/part-00000"));
assertEquals(expectedStr, result);
}
Aggregations