use of org.apache.parquet.hadoop.Footer in project drill by apache.
the class ParquetRecordReaderTest method validateFooters.
private void validateFooters(final List<Footer> metadata) {
logger.debug(metadata.toString());
assertEquals(3, metadata.size());
for (Footer footer : metadata) {
final File file = new File(footer.getFile().toUri());
assertTrue(file.getName(), file.getName().startsWith("part"));
assertTrue(file.getPath(), file.exists());
final ParquetMetadata parquetMetadata = footer.getParquetMetadata();
assertEquals(2, parquetMetadata.getBlocks().size());
final Map<String, String> keyValueMetaData = parquetMetadata.getFileMetaData().getKeyValueMetaData();
assertEquals("bar", keyValueMetaData.get("foo"));
assertEquals(footer.getFile().getName(), keyValueMetaData.get(footer.getFile().getName()));
}
}
use of org.apache.parquet.hadoop.Footer in project parquet-mr by apache.
the class RowCountCommand method execute.
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
String[] args = options.getArgs();
String input = args[0];
out = new PrintWriter(Main.out, true);
inputPath = new Path(input);
conf = new Configuration();
inputFileStatuses = inputPath.getFileSystem(conf).globStatus(inputPath);
long rowCount = 0;
for (FileStatus fs : inputFileStatuses) {
long fileRowCount = 0;
for (Footer f : ParquetFileReader.readFooters(conf, fs, false)) {
for (BlockMetaData b : f.getParquetMetadata().getBlocks()) {
rowCount += b.getRowCount();
fileRowCount += b.getRowCount();
}
}
if (options.hasOption('d')) {
out.format("%s row count: %d\n", fs.getPath().getName(), fileRowCount);
}
}
out.format("Total RowCount: %d", rowCount);
out.println();
}
use of org.apache.parquet.hadoop.Footer in project parquet-mr by apache.
the class ShowMetaCommand method execute.
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
String[] args = options.getArgs();
String input = args[0];
Configuration conf = new Configuration();
Path inputPath = new Path(input);
FileStatus inputFileStatus = inputPath.getFileSystem(conf).getFileStatus(inputPath);
List<Footer> footers = ParquetFileReader.readFooters(conf, inputFileStatus, false);
PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter().withAutoColumn().withWhitespaceHandler(WhiteSpaceHandler.COLLAPSE_WHITESPACE).withColumnPadding(1).build();
for (Footer f : footers) {
out.format("file: %s%n", f.getFile());
MetadataUtils.showDetails(out, f.getParquetMetadata());
out.flushColumns();
}
}
use of org.apache.parquet.hadoop.Footer in project parquet-mr by apache.
the class SizeCommand method execute.
@Override
public void execute(CommandLine options) throws Exception {
super.execute(options);
String[] args = options.getArgs();
String input = args[0];
out = new PrintWriter(Main.out, true);
inputPath = new Path(input);
conf = new Configuration();
inputFileStatuses = inputPath.getFileSystem(conf).globStatus(inputPath);
long size = 0;
for (FileStatus fs : inputFileStatuses) {
long fileSize = 0;
for (Footer f : ParquetFileReader.readFooters(conf, fs, false)) {
for (BlockMetaData b : f.getParquetMetadata().getBlocks()) {
size += (options.hasOption('u') ? b.getTotalByteSize() : b.getCompressedSize());
fileSize += (options.hasOption('u') ? b.getTotalByteSize() : b.getCompressedSize());
}
}
if (options.hasOption('d')) {
if (options.hasOption('p')) {
out.format("%s: %s\n", fs.getPath().getName(), getPrettySize(fileSize));
} else {
out.format("%s: %d bytes\n", fs.getPath().getName(), fileSize);
}
}
}
if (options.hasOption('p')) {
out.format("Total Size: %s", getPrettySize(size));
} else {
out.format("Total Size: %d bytes", size);
}
out.println();
}
use of org.apache.parquet.hadoop.Footer in project parquet-mr by apache.
the class DeprecatedParquetInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
if (isTaskSideMetaData(job)) {
return super.getSplits(job, numSplits);
}
List<Footer> footers = getFooters(job);
List<ParquetInputSplit> splits = realInputFormat.getSplits(job, footers);
if (splits == null) {
return null;
}
InputSplit[] resultSplits = new InputSplit[splits.size()];
int i = 0;
for (ParquetInputSplit split : splits) {
resultSplits[i++] = new ParquetInputSplitWrapper(split);
}
return resultSplits;
}
Aggregations