use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.
the class DumpCommand method execute.
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
String[] args = options.getArgs();
String input = args[0];
Configuration conf = new Configuration();
Path inpath = new Path(input);
ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER);
MessageType schema = metaData.getFileMetaData().getSchema();
boolean showmd = !options.hasOption('m');
boolean showdt = !options.hasOption('d');
boolean cropoutput = !options.hasOption('n');
Set<String> showColumns = null;
if (options.hasOption('c')) {
String[] cols = options.getOptionValues('c');
showColumns = new HashSet<String>(Arrays.asList(cols));
}
PrettyPrintWriter out = prettyPrintWriter(cropoutput);
dump(out, metaData, schema, inpath, showmd, showdt, showColumns);
}
use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.
the class CatCommand method execute.
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
String[] args = options.getArgs();
String input = args[0];
ParquetReader<SimpleRecord> reader = null;
try {
PrintWriter writer = new PrintWriter(Main.out, true);
reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
ParquetMetadata metadata = ParquetFileReader.readFooter(new Configuration(), new Path(input));
JsonRecordFormatter.JsonGroupFormatter formatter = JsonRecordFormatter.fromSchema(metadata.getFileMetaData().getSchema());
for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
if (options.hasOption('j')) {
writer.write(formatter.formatRecord(value));
} else {
value.prettyPrint(writer);
}
writer.println();
}
} finally {
if (reader != null) {
try {
reader.close();
} catch (Exception ex) {
}
}
}
}
use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.
the class ParquetFileWriter method end.
/**
* ends a file once all blocks have been written.
* closes the file.
* @param extraMetaData the extra meta data to write in the footer
* @throws IOException
*/
public void end(Map<String, String> extraMetaData) throws IOException {
state = state.end();
LOG.debug("{}: end", out.getPos());
this.footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData, Version.FULL_VERSION), blocks);
serializeFooter(footer, out);
out.close();
}
use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.
the class ParquetFileWriter method writeMetadataFile.
/**
* writes _common_metadata file, and optionally a _metadata file depending on the {@link JobSummaryLevel} provided
* @deprecated metadata files are not recommended and will be removed in 2.0.0
*/
@Deprecated
public static void writeMetadataFile(Configuration configuration, Path outputPath, List<Footer> footers, JobSummaryLevel level) throws IOException {
Preconditions.checkArgument(level == JobSummaryLevel.ALL || level == JobSummaryLevel.COMMON_ONLY, "Unsupported level: " + level);
FileSystem fs = outputPath.getFileSystem(configuration);
outputPath = outputPath.makeQualified(fs);
ParquetMetadata metadataFooter = mergeFooters(outputPath, footers);
if (level == JobSummaryLevel.ALL) {
writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_METADATA_FILE);
}
metadataFooter.getBlocks().clear();
writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_COMMON_METADATA_FILE);
}
use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.
the class ParquetFileWriter method getGlobalMetaData.
static GlobalMetaData getGlobalMetaData(List<Footer> footers, boolean strict) {
GlobalMetaData fileMetaData = null;
for (Footer footer : footers) {
ParquetMetadata currentMetadata = footer.getParquetMetadata();
fileMetaData = mergeInto(currentMetadata.getFileMetaData(), fileMetaData, strict);
}
return fileMetaData;
}
Aggregations