use of org.apache.parquet.column.page.DictionaryPageReadStore in project parquet-mr by apache.
the class ShowDictionaryCommand method run.
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() >= 1, "A Parquet file is required.");
Preconditions.checkArgument(targets.size() == 1, "Cannot process multiple Parquet files.");
String source = targets.get(0);
ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source));
MessageType schema = reader.getFileMetaData().getSchema();
ColumnDescriptor descriptor = Util.descriptor(column, schema);
PrimitiveType type = Util.primitive(column, schema);
Preconditions.checkNotNull(type);
DictionaryPageReadStore dictionaryReader;
int rowGroup = 0;
while ((dictionaryReader = reader.getNextDictionaryReader()) != null) {
DictionaryPage page = dictionaryReader.readDictionaryPage(descriptor);
Dictionary dict = page.getEncoding().initDictionary(descriptor, page);
console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column, page.getCompressedSize());
for (int i = 0; i <= dict.getMaxId(); i += 1) {
switch(type.getPrimitiveTypeName()) {
case BINARY:
if (type.getOriginalType() == OriginalType.UTF8) {
console.info("{}: {}", String.format("%6d", i), Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
} else {
console.info("{}: {}", String.format("%6d", i), Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
}
break;
case INT32:
console.info("{}: {}", String.format("%6d", i), dict.decodeToInt(i));
break;
case INT64:
console.info("{}: {}", String.format("%6d", i), dict.decodeToLong(i));
break;
case FLOAT:
console.info("{}: {}", String.format("%6d", i), dict.decodeToFloat(i));
break;
case DOUBLE:
console.info("{}: {}", String.format("%6d", i), dict.decodeToDouble(i));
break;
default:
throw new IllegalArgumentException("Unknown dictionary type: " + type.getPrimitiveTypeName());
}
}
reader.skipNextRowGroup();
rowGroup += 1;
}
console.info("");
return 0;
}
use of org.apache.parquet.column.page.DictionaryPageReadStore in project parquet-mr by apache.
the class DictionaryFilterTest method testColumnWithoutDictionary.
@Test
public void testColumnWithoutDictionary() throws Exception {
IntColumn plain = intColumn("plain_int32_field");
DictionaryPageReadStore dictionaryStore = mock(DictionaryPageReadStore.class);
assertFalse("Should never drop block using plain encoding", canDrop(eq(plain, -10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(lt(plain, -10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(ltEq(plain, -10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(gt(plain, nElements + 10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(gtEq(plain, nElements + 10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(notEq(plain, nElements + 10), ccmd, dictionaryStore));
verifyZeroInteractions(dictionaryStore);
}
use of org.apache.parquet.column.page.DictionaryPageReadStore in project parquet-mr by apache.
the class DictionaryFilterTest method testColumnWithDictionaryAndPlainEncodings.
@Test
public void testColumnWithDictionaryAndPlainEncodings() throws Exception {
IntColumn plain = intColumn("fallback_binary_field");
DictionaryPageReadStore dictionaryStore = mock(DictionaryPageReadStore.class);
assertFalse("Should never drop block using plain encoding", canDrop(eq(plain, -10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(lt(plain, -10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(ltEq(plain, -10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(gt(plain, nElements + 10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(gtEq(plain, nElements + 10), ccmd, dictionaryStore));
assertFalse("Should never drop block using plain encoding", canDrop(notEq(plain, nElements + 10), ccmd, dictionaryStore));
verifyZeroInteractions(dictionaryStore);
}
Aggregations