Search in sources :

Example 51 with ParquetMetadata

use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.

the class DumpCommand method execute.

@Override
public void execute(CommandLine options) throws Exception {
    super.execute(options);
    String[] args = options.getArgs();
    String input = args[0];
    Configuration conf = new Configuration();
    Path inpath = new Path(input);
    ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER);
    MessageType schema = metaData.getFileMetaData().getSchema();
    boolean showmd = !options.hasOption('m');
    boolean showdt = !options.hasOption('d');
    boolean cropoutput = !options.hasOption('n');
    Set<String> showColumns = null;
    if (options.hasOption('c')) {
        String[] cols = options.getOptionValues('c');
        showColumns = new HashSet<String>(Arrays.asList(cols));
    }
    PrettyPrintWriter out = prettyPrintWriter(cropoutput);
    dump(out, metaData, schema, inpath, showmd, showdt, showColumns);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) PrettyPrintWriter(org.apache.parquet.tools.util.PrettyPrintWriter) MessageType(org.apache.parquet.schema.MessageType)

Example 52 with ParquetMetadata

use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.

the class CatCommand method execute.

@Override
public void execute(CommandLine options) throws Exception {
    super.execute(options);
    String[] args = options.getArgs();
    String input = args[0];
    ParquetReader<SimpleRecord> reader = null;
    try {
        PrintWriter writer = new PrintWriter(Main.out, true);
        reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
        ParquetMetadata metadata = ParquetFileReader.readFooter(new Configuration(), new Path(input));
        JsonRecordFormatter.JsonGroupFormatter formatter = JsonRecordFormatter.fromSchema(metadata.getFileMetaData().getSchema());
        for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
            if (options.hasOption('j')) {
                writer.write(formatter.formatRecord(value));
            } else {
                value.prettyPrint(writer);
            }
            writer.println();
        }
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ex) {
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) SimpleReadSupport(org.apache.parquet.tools.read.SimpleReadSupport) SimpleRecord(org.apache.parquet.tools.read.SimpleRecord) JsonRecordFormatter(org.apache.parquet.tools.json.JsonRecordFormatter) PrintWriter(java.io.PrintWriter)

Example 53 with ParquetMetadata

use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.

the class ParquetFileWriter method end.

/**
 * ends a file once all blocks have been written.
 * closes the file.
 * @param extraMetaData the extra meta data to write in the footer
 * @throws IOException
 */
public void end(Map<String, String> extraMetaData) throws IOException {
    state = state.end();
    LOG.debug("{}: end", out.getPos());
    this.footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData, Version.FULL_VERSION), blocks);
    serializeFooter(footer, out);
    out.close();
}
Also used : ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) Util.writeFileMetaData(org.apache.parquet.format.Util.writeFileMetaData) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData)

Example 54 with ParquetMetadata

use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.

the class ParquetFileWriter method writeMetadataFile.

/**
 * writes _common_metadata file, and optionally a _metadata file depending on the {@link JobSummaryLevel} provided
 * @deprecated metadata files are not recommended and will be removed in 2.0.0
 */
@Deprecated
public static void writeMetadataFile(Configuration configuration, Path outputPath, List<Footer> footers, JobSummaryLevel level) throws IOException {
    Preconditions.checkArgument(level == JobSummaryLevel.ALL || level == JobSummaryLevel.COMMON_ONLY, "Unsupported level: " + level);
    FileSystem fs = outputPath.getFileSystem(configuration);
    outputPath = outputPath.makeQualified(fs);
    ParquetMetadata metadataFooter = mergeFooters(outputPath, footers);
    if (level == JobSummaryLevel.ALL) {
        writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_METADATA_FILE);
    }
    metadataFooter.getBlocks().clear();
    writeMetadataFile(outputPath, metadataFooter, fs, PARQUET_COMMON_METADATA_FILE);
}
Also used : ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 55 with ParquetMetadata

use of org.apache.parquet.hadoop.metadata.ParquetMetadata in project parquet-mr by apache.

the class ParquetFileWriter method getGlobalMetaData.

static GlobalMetaData getGlobalMetaData(List<Footer> footers, boolean strict) {
    GlobalMetaData fileMetaData = null;
    for (Footer footer : footers) {
        ParquetMetadata currentMetadata = footer.getParquetMetadata();
        fileMetaData = mergeInto(currentMetadata.getFileMetaData(), fileMetaData, strict);
    }
    return fileMetaData;
}
Also used : ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) GlobalMetaData(org.apache.parquet.hadoop.metadata.GlobalMetaData)

Aggregations

ParquetMetadata (org.apache.parquet.hadoop.metadata.ParquetMetadata)76 Path (org.apache.hadoop.fs.Path)39 BlockMetaData (org.apache.parquet.hadoop.metadata.BlockMetaData)27 Configuration (org.apache.hadoop.conf.Configuration)21 MessageType (org.apache.parquet.schema.MessageType)21 ArrayList (java.util.ArrayList)19 IOException (java.io.IOException)18 Test (org.junit.Test)17 FileSystem (org.apache.hadoop.fs.FileSystem)16 Map (java.util.Map)11 FileMetaData (org.apache.parquet.hadoop.metadata.FileMetaData)11 File (java.io.File)10 FileStatus (org.apache.hadoop.fs.FileStatus)10 ColumnPath (org.apache.parquet.hadoop.metadata.ColumnPath)9 HashMap (java.util.HashMap)8 ColumnChunkMetaData (org.apache.parquet.hadoop.metadata.ColumnChunkMetaData)7 List (java.util.List)6 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)6 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)6 ParquetFileReader (org.apache.parquet.hadoop.ParquetFileReader)6