use of org.apache.parquet.hadoop.util.ColumnMasker.MaskMode in project parquet-mr by apache.
the class ColumnMaskingCommand method run.
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(mode != null && (mode.equals("nullify")), "mask mode cannot be null and can be only nullify");
Preconditions.checkArgument(input != null && output != null, "Both input and output parquet file paths are required.");
Preconditions.checkArgument(cols != null && cols.size() > 0, "columns cannot be null or empty");
MaskMode maskMode = MaskMode.fromString(mode);
Path inPath = new Path(input);
Path outPath = new Path(output);
ParquetMetadata metaData = ParquetFileReader.readFooter(getConf(), inPath, NO_FILTER);
MessageType schema = metaData.getFileMetaData().getSchema();
ParquetFileWriter writer = new ParquetFileWriter(getConf(), schema, outPath, ParquetFileWriter.Mode.CREATE);
writer.start();
try (TransParquetFileReader reader = new TransParquetFileReader(HadoopInputFile.fromPath(inPath, getConf()), HadoopReadOptions.builder(getConf()).build())) {
masker.processBlocks(reader, writer, metaData, schema, cols, maskMode);
} finally {
writer.end(metaData.getFileMetaData().getKeyValueMetaData());
}
return 0;
}
Aggregations