use of org.apache.parquet.hadoop.metadata.CompressionCodecName in project drill by apache.
the class AsyncPageReader method decompress.
private DrillBuf decompress(PageHeader pageHeader, DrillBuf compressedData) {
DrillBuf pageDataBuf = null;
Stopwatch timer = Stopwatch.createUnstarted();
long timeToRead;
int compressedSize = pageHeader.getCompressed_page_size();
int uncompressedSize = pageHeader.getUncompressed_page_size();
pageDataBuf = allocateTemporaryBuffer(uncompressedSize);
try {
timer.start();
CompressionCodecName codecName = parentColumnReader.columnChunkMetaData.getCodec();
ByteBuffer input = compressedData.nioBuffer(0, compressedSize);
ByteBuffer output = pageDataBuf.nioBuffer(0, uncompressedSize);
DecompressionHelper decompressionHelper = new DecompressionHelper(codecName);
decompressionHelper.decompress(input, compressedSize, output, uncompressedSize);
pageDataBuf.writerIndex(uncompressedSize);
timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
this.updateStats(pageHeader, "Decompress", 0, timeToRead, compressedSize, uncompressedSize);
} catch (IOException e) {
handleAndThrowException(e, "Error decompressing data.");
}
return pageDataBuf;
}
use of org.apache.parquet.hadoop.metadata.CompressionCodecName in project drill by axbaretto.
the class AsyncPageReader method decompress.
private DrillBuf decompress(PageHeader pageHeader, DrillBuf compressedData) {
DrillBuf pageDataBuf = null;
Stopwatch timer = Stopwatch.createUnstarted();
long timeToRead;
int compressedSize = pageHeader.getCompressed_page_size();
int uncompressedSize = pageHeader.getUncompressed_page_size();
pageDataBuf = allocateTemporaryBuffer(uncompressedSize);
try {
timer.start();
CompressionCodecName codecName = parentColumnReader.columnChunkMetaData.getCodec();
ByteBuffer input = compressedData.nioBuffer(0, compressedSize);
ByteBuffer output = pageDataBuf.nioBuffer(0, uncompressedSize);
DecompressionHelper decompressionHelper = new DecompressionHelper(codecName);
decompressionHelper.decompress(input, compressedSize, output, uncompressedSize);
pageDataBuf.writerIndex(uncompressedSize);
timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
this.updateStats(pageHeader, "Decompress", 0, timeToRead, compressedSize, uncompressedSize);
} catch (IOException e) {
handleAndThrowException(e, "Error decompressing data.");
}
return pageDataBuf;
}
use of org.apache.parquet.hadoop.metadata.CompressionCodecName in project parquet-mr by apache.
the class ShowPagesCommand method run.
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() >= 1, "A Parquet file is required.");
Preconditions.checkArgument(targets.size() == 1, "Cannot process multiple Parquet files.");
String source = targets.get(0);
ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source));
MessageType schema = reader.getFileMetaData().getSchema();
Map<ColumnDescriptor, PrimitiveType> columns = Maps.newLinkedHashMap();
if (this.columns == null || this.columns.isEmpty()) {
for (ColumnDescriptor descriptor : schema.getColumns()) {
columns.put(descriptor, primitive(schema, descriptor.getPath()));
}
} else {
for (String column : this.columns) {
columns.put(descriptor(column, schema), primitive(column, schema));
}
}
CompressionCodecName codec = reader.getRowGroups().get(0).getColumns().get(0).getCodec();
// accumulate formatted lines to print by column
Map<String, List<String>> formatted = Maps.newLinkedHashMap();
PageFormatter formatter = new PageFormatter();
PageReadStore pageStore;
int rowGroupNum = 0;
while ((pageStore = reader.readNextRowGroup()) != null) {
for (ColumnDescriptor descriptor : columns.keySet()) {
List<String> lines = formatted.get(columnName(descriptor));
if (lines == null) {
lines = Lists.newArrayList();
formatted.put(columnName(descriptor), lines);
}
formatter.setContext(rowGroupNum, columns.get(descriptor), codec);
PageReader pages = pageStore.getPageReader(descriptor);
DictionaryPage dict = pages.readDictionaryPage();
if (dict != null) {
lines.add(formatter.format(dict));
}
DataPage page;
while ((page = pages.readPage()) != null) {
lines.add(formatter.format(page));
}
}
rowGroupNum += 1;
}
// TODO: Show total column size and overall size per value in the column summary line
for (String columnName : formatted.keySet()) {
console.info(String.format("\nColumn: %s\n%s", columnName, StringUtils.leftPad("", 80, '-')));
console.info(formatter.getHeader());
for (String line : formatted.get(columnName)) {
console.info(line);
}
console.info("");
}
return 0;
}
use of org.apache.parquet.hadoop.metadata.CompressionCodecName in project parquet-mr by apache.
the class FileEncodingsIT method getParameters.
@Parameterized.Parameters
public static Collection<Object[]> getParameters() {
List<PrimitiveTypeName> types = Arrays.asList(PrimitiveTypeName.BOOLEAN, PrimitiveTypeName.INT32, PrimitiveTypeName.INT64, PrimitiveTypeName.INT96, PrimitiveTypeName.FLOAT, PrimitiveTypeName.DOUBLE, PrimitiveTypeName.BINARY, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY);
List<CompressionCodecName> codecs;
String codecList = System.getenv("TEST_CODECS");
if (codecList != null) {
codecs = new ArrayList<CompressionCodecName>();
for (String codec : codecList.split(",")) {
codecs.add(CompressionCodecName.valueOf(codec.toUpperCase(Locale.ENGLISH)));
}
} else {
// otherwise test just UNCOMPRESSED
codecs = Arrays.asList(CompressionCodecName.UNCOMPRESSED);
}
System.err.println("Testing codecs: " + codecs);
List<Object[]> parameters = new ArrayList<Object[]>();
for (PrimitiveTypeName type : types) {
for (CompressionCodecName codec : codecs) {
parameters.add(new Object[] { type, codec });
}
}
return parameters;
}
use of org.apache.parquet.hadoop.metadata.CompressionCodecName in project parquet-mr by apache.
the class TestInputFormat method createParquetFile.
private void createParquetFile(File file) throws IOException {
Path path = new Path(file.toURI());
Configuration configuration = new Configuration();
MessageType schema = MessageTypeParser.parseMessageType("message m { required group a {required binary b;}}");
String[] columnPath = { "a", "b" };
ColumnDescriptor c1 = schema.getColumnDescription(columnPath);
byte[] bytes1 = { 0, 1, 2, 3 };
byte[] bytes2 = { 2, 3, 4, 5 };
CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
BinaryStatistics stats = new BinaryStatistics();
ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
w.start();
w.startBlock(3);
w.startColumn(c1, 5, codec);
w.writeDataPage(2, 4, BytesInput.from(bytes1), stats, BIT_PACKED, BIT_PACKED, PLAIN);
w.writeDataPage(3, 4, BytesInput.from(bytes1), stats, BIT_PACKED, BIT_PACKED, PLAIN);
w.endColumn();
w.endBlock();
w.startBlock(4);
w.startColumn(c1, 7, codec);
w.writeDataPage(7, 4, BytesInput.from(bytes2), stats, BIT_PACKED, BIT_PACKED, PLAIN);
w.endColumn();
w.endBlock();
w.end(new HashMap<String, String>());
}
Aggregations