use of org.apache.parquet.io.ParquetEncodingException in project parquet-mr by apache.
the class DictionaryValuesWriter method getBytes.
@Override
public BytesInput getBytes() {
int maxDicId = getDictionarySize() - 1;
LOG.debug("max dic id {}", maxDicId);
int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId);
int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10);
RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator);
encoders.add(encoder);
IntIterator iterator = encodedValues.iterator();
try {
while (iterator.hasNext()) {
encoder.writeInt(iterator.next());
}
// encodes the bit width
byte[] bytesHeader = new byte[] { (byte) bitWidth };
BytesInput rleEncodedBytes = encoder.toBytes();
LOG.debug("rle encoded bytes {}", rleEncodedBytes.size());
BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
// remember size of dictionary when we last wrote a page
lastUsedDictionarySize = getDictionarySize();
lastUsedDictionaryByteSize = dictionaryByteSize;
return bytes;
} catch (IOException e) {
throw new ParquetEncodingException("could not encode the values", e);
}
}
use of org.apache.parquet.io.ParquetEncodingException in project parquet-mr by apache.
the class PlainValuesWriter method writeBytes.
@Override
public final void writeBytes(Binary v) {
try {
out.writeInt(v.length());
v.writeTo(out);
} catch (IOException e) {
throw new ParquetEncodingException("could not write bytes", e);
}
}
use of org.apache.parquet.io.ParquetEncodingException in project parquet-mr by apache.
the class ParquetFileWriter method mergeFooters.
static ParquetMetadata mergeFooters(Path root, List<Footer> footers) {
String rootPath = root.toUri().getPath();
GlobalMetaData fileMetaData = null;
List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
for (Footer footer : footers) {
String footerPath = footer.getFile().toUri().getPath();
if (!footerPath.startsWith(rootPath)) {
throw new ParquetEncodingException(footerPath + " invalid: all the files must be contained in the root " + root);
}
footerPath = footerPath.substring(rootPath.length());
while (footerPath.startsWith("/")) {
footerPath = footerPath.substring(1);
}
fileMetaData = mergeInto(footer.getParquetMetadata().getFileMetaData(), fileMetaData);
for (BlockMetaData block : footer.getParquetMetadata().getBlocks()) {
block.setPath(footerPath);
blocks.add(block);
}
}
return new ParquetMetadata(fileMetaData.merge(), blocks);
}
use of org.apache.parquet.io.ParquetEncodingException in project parquet-mr by apache.
the class MemPageWriter method writePage.
@Override
public void writePage(BytesInput bytesInput, int valueCount, Statistics statistics, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException {
if (valueCount == 0) {
throw new ParquetEncodingException("illegal page of 0 values");
}
memSize += bytesInput.size();
pages.add(new DataPageV1(BytesInput.copy(bytesInput), valueCount, (int) bytesInput.size(), statistics, rlEncoding, dlEncoding, valuesEncoding));
totalValueCount += valueCount;
LOG.debug("page written for {} bytes and {} records", bytesInput.size(), valueCount);
}
use of org.apache.parquet.io.ParquetEncodingException in project parquet-mr by apache.
the class DataWritableWriter method writeData.
private void writeData(final ArrayWritable arr, final GroupType type) {
if (arr == null) {
return;
}
final int fieldCount = type.getFieldCount();
Writable[] values = arr.get();
for (int field = 0; field < fieldCount; ++field) {
final Type fieldType = type.getType(field);
final String fieldName = fieldType.getName();
final Writable value = values[field];
if (value == null) {
continue;
}
recordConsumer.startField(fieldName, field);
if (fieldType.isPrimitive()) {
writePrimitive(value);
} else {
recordConsumer.startGroup();
if (value instanceof ArrayWritable) {
if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) {
writeArray((ArrayWritable) value, fieldType.asGroupType());
} else {
writeData((ArrayWritable) value, fieldType.asGroupType());
}
} else if (value != null) {
throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value);
}
recordConsumer.endGroup();
}
recordConsumer.endField(fieldName, field);
}
}
Aggregations