use of org.apache.parquet.thrift.test.RequiredPrimitiveFixture in project parquet-mr by apache.
the class TestThriftToParquetFileWriter method testWriteStatistics.
@Test
public void testWriteStatistics() throws Exception {
// create correct stats small numbers
IntStatistics intStatsSmall = new IntStatistics();
intStatsSmall.setMinMax(2, 100);
LongStatistics longStatsSmall = new LongStatistics();
longStatsSmall.setMinMax(-17l, 287L);
DoubleStatistics doubleStatsSmall = new DoubleStatistics();
doubleStatsSmall.setMinMax(-15.55d, 9.63d);
BinaryStatistics binaryStatsSmall = new BinaryStatistics();
binaryStatsSmall.setMinMax(Binary.fromString("as"), Binary.fromString("world"));
BooleanStatistics boolStats = new BooleanStatistics();
boolStats.setMinMax(false, true);
// write rows to a file
Path p = createFile(new RequiredPrimitiveFixture(false, (byte) 32, (short) 32, 2, 90l, -15.55d, "as"), new RequiredPrimitiveFixture(false, (byte) 100, (short) 100, 100, 287l, -9.0d, "world"), new RequiredPrimitiveFixture(true, (byte) 2, (short) 2, 9, -17l, 9.63d, "hello"));
final Configuration configuration = new Configuration();
configuration.setBoolean("parquet.strings.signed-min-max.enabled", true);
final FileSystem fs = p.getFileSystem(configuration);
FileStatus fileStatus = fs.getFileStatus(p);
ParquetMetadata footer = ParquetFileReader.readFooter(configuration, p);
for (BlockMetaData bmd : footer.getBlocks()) {
for (ColumnChunkMetaData cmd : bmd.getColumns()) {
switch(cmd.getType()) {
case INT32:
TestUtils.assertStatsValuesEqual(intStatsSmall, cmd.getStatistics());
break;
case INT64:
TestUtils.assertStatsValuesEqual(longStatsSmall, cmd.getStatistics());
break;
case DOUBLE:
TestUtils.assertStatsValuesEqual(doubleStatsSmall, cmd.getStatistics());
break;
case BOOLEAN:
TestUtils.assertStatsValuesEqual(boolStats, cmd.getStatistics());
break;
case BINARY:
// there is also info_string that has no statistics
if (cmd.getPath().toString() == "[test_string]")
TestUtils.assertStatsValuesEqual(binaryStatsSmall, cmd.getStatistics());
break;
}
}
}
// create correct stats large numbers
IntStatistics intStatsLarge = new IntStatistics();
intStatsLarge.setMinMax(-Integer.MAX_VALUE, Integer.MAX_VALUE);
LongStatistics longStatsLarge = new LongStatistics();
longStatsLarge.setMinMax(-Long.MAX_VALUE, Long.MAX_VALUE);
DoubleStatistics doubleStatsLarge = new DoubleStatistics();
doubleStatsLarge.setMinMax(-Double.MAX_VALUE, Double.MAX_VALUE);
BinaryStatistics binaryStatsLarge = new BinaryStatistics();
binaryStatsLarge.setMinMax(Binary.fromString("some small string"), Binary.fromString("some very large string here to test in this function"));
// write rows to a file
Path p_large = createFile(new RequiredPrimitiveFixture(false, (byte) 2, (short) 32, -Integer.MAX_VALUE, -Long.MAX_VALUE, -Double.MAX_VALUE, "some small string"), new RequiredPrimitiveFixture(false, (byte) 100, (short) 100, Integer.MAX_VALUE, Long.MAX_VALUE, Double.MAX_VALUE, "some very large string here to test in this function"), new RequiredPrimitiveFixture(true, (byte) 2, (short) 2, 9, -17l, 9.63d, "hello"));
// make new configuration and create file with new large stats
final Configuration configuration_large = new Configuration();
configuration.setBoolean("parquet.strings.signed-min-max.enabled", true);
final FileSystem fs_large = p_large.getFileSystem(configuration_large);
FileStatus fileStatus_large = fs_large.getFileStatus(p_large);
ParquetMetadata footer_large = ParquetFileReader.readFooter(configuration_large, p_large);
for (BlockMetaData bmd : footer_large.getBlocks()) {
for (ColumnChunkMetaData cmd : bmd.getColumns()) {
switch(cmd.getType()) {
case INT32:
// testing the correct limits of an int32, there are also byte and short, tested earlier
if (cmd.getPath().toString() == "[test_i32]")
TestUtils.assertStatsValuesEqual(intStatsLarge, cmd.getStatistics());
break;
case INT64:
TestUtils.assertStatsValuesEqual(longStatsLarge, cmd.getStatistics());
break;
case DOUBLE:
TestUtils.assertStatsValuesEqual(doubleStatsLarge, cmd.getStatistics());
break;
case BOOLEAN:
TestUtils.assertStatsValuesEqual(boolStats, cmd.getStatistics());
break;
case BINARY:
// there is also info_string that has no statistics
if (cmd.getPath().toString() == "[test_string]")
TestUtils.assertStatsValuesEqual(binaryStatsLarge, cmd.getStatistics());
break;
}
}
}
}
use of org.apache.parquet.thrift.test.RequiredPrimitiveFixture in project parquet-mr by apache.
the class ParquetScroogeSchemeTest method testWritePrimitveThriftReadScrooge.
@Test
public void testWritePrimitveThriftReadScrooge() throws Exception {
RequiredPrimitiveFixture toWrite = new RequiredPrimitiveFixture(true, (byte) 2, (short) 3, 4, (long) 5, 6.0, "7");
toWrite.setInfo_string("it's info");
verifyScroogeRead(thriftRecords(toWrite), org.apache.parquet.scrooge.test.RequiredPrimitiveFixture.class, "RequiredPrimitiveFixture(true,2,3,4,5,6.0,7,Some(it's info))\n", "**");
}
Aggregations