Search in sources :

Example 1 with ZSTD

use of com.facebook.presto.orc.metadata.CompressionKind.ZSTD in project presto by prestodb.

the class OrcTester method assertRoundTrip.

private void assertRoundTrip(List<Type> writeTypes, List<Type> readTypes, List<List<?>> writeValues, List<List<?>> readValues, boolean verifyWithHiveReader, List<OrcReaderSettings> settings) throws Exception {
    assertEquals(writeTypes.size(), readTypes.size());
    assertEquals(writeTypes.size(), writeValues.size());
    assertEquals(writeTypes.size(), readValues.size());
    OrcWriterStats stats = new OrcWriterStats();
    for (Format format : formats) {
        if (!readTypes.stream().allMatch(readType -> format.supportsType(readType))) {
            return;
        }
        OrcEncoding orcEncoding = format.getOrcEncoding();
        for (CompressionKind compression : compressions) {
            boolean hiveSupported = (compression != LZ4) && (compression != ZSTD);
            // write Hive, read Presto
            if (hiveSupported) {
                try (TempFile tempFile = new TempFile()) {
                    writeOrcColumnsHive(tempFile.getFile(), format, compression, writeTypes, writeValues);
                    assertFileContentsPresto(readTypes, tempFile, readValues, false, false, orcEncoding, format, true, useSelectiveOrcReader, settings, ImmutableMap.of());
                }
            }
            // write Presto, read Hive and Presto
            try (TempFile tempFile = new TempFile()) {
                writeOrcColumnsPresto(tempFile.getFile(), format, compression, Optional.empty(), writeTypes, writeValues, stats);
                if (verifyWithHiveReader && hiveSupported) {
                    assertFileContentsHive(readTypes, tempFile, format, readValues);
                }
                assertFileContentsPresto(readTypes, tempFile, readValues, false, false, orcEncoding, format, false, useSelectiveOrcReader, settings, ImmutableMap.of());
                if (skipBatchTestsEnabled) {
                    assertFileContentsPresto(readTypes, tempFile, readValues, true, false, orcEncoding, format, false, useSelectiveOrcReader, settings, ImmutableMap.of());
                }
                if (skipStripeTestsEnabled) {
                    assertFileContentsPresto(readTypes, tempFile, readValues, false, true, orcEncoding, format, false, useSelectiveOrcReader, settings, ImmutableMap.of());
                }
            }
            // write presto read presto
            if (dwrfEncryptionEnabled && format == DWRF) {
                try (TempFile tempFile = new TempFile()) {
                    DwrfWriterEncryption dwrfWriterEncryption = generateWriterEncryption();
                    writeOrcColumnsPresto(tempFile.getFile(), format, compression, Optional.of(dwrfWriterEncryption), writeTypes, writeValues, stats);
                    ImmutableMap.Builder<Integer, Slice> intermediateKeysBuilder = ImmutableMap.builder();
                    for (int i = 0; i < dwrfWriterEncryption.getWriterEncryptionGroups().size(); i++) {
                        for (Integer node : dwrfWriterEncryption.getWriterEncryptionGroups().get(i).getNodes()) {
                            intermediateKeysBuilder.put(node, dwrfWriterEncryption.getWriterEncryptionGroups().get(i).getIntermediateKeyMetadata());
                        }
                    }
                    Map<Integer, Slice> intermediateKeysMap = intermediateKeysBuilder.build();
                    assertFileContentsPresto(readTypes, tempFile, readValues, false, false, orcEncoding, format, false, useSelectiveOrcReader, settings, intermediateKeysMap);
                    if (skipBatchTestsEnabled) {
                        assertFileContentsPresto(readTypes, tempFile, readValues, true, false, orcEncoding, format, false, useSelectiveOrcReader, settings, intermediateKeysMap);
                    }
                    if (skipStripeTestsEnabled) {
                        assertFileContentsPresto(readTypes, tempFile, readValues, false, true, orcEncoding, format, false, useSelectiveOrcReader, settings, intermediateKeysMap);
                    }
                }
            }
        }
    }
    assertEquals(stats.getWriterSizeInBytes(), 0);
}
Also used : OrcUtil(org.apache.hadoop.hive.ql.io.orc.OrcUtil) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) ZonedDateTime(java.time.ZonedDateTime) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) ORC_11(com.facebook.presto.orc.OrcTester.Format.ORC_11) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Unit(io.airlift.units.DataSize.Unit) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) HIVE_ORC_DICTIONARY_ENCODING_INTERVAL(com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) BigDecimal(java.math.BigDecimal) Chars.truncateToLengthAndTrimSpaces(com.facebook.presto.common.type.Chars.truncateToLengthAndTrimSpaces) Arrays.asList(java.util.Arrays.asList) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) BigInteger(java.math.BigInteger) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) IntWritable(org.apache.hadoop.io.IntWritable) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) Footer(com.facebook.presto.orc.metadata.Footer) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) ReaderOptions(org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions) ZoneId(java.time.ZoneId) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) Decimals.rescale(com.facebook.presto.common.type.Decimals.rescale) IS_NOT_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NOT_NULL) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) IS_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL) OutputStreamDataSink(com.facebook.presto.common.io.OutputStreamDataSink) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) HIVE_ORC_COMPRESSION(com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_COMPRESSION) TestBigintRange(com.facebook.presto.orc.TrackingTupleDomainFilter.TestBigintRange) JavaHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector) Iterables(com.google.common.collect.Iterables) StandardTypes(com.facebook.presto.common.type.StandardTypes) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) DataSize.succinctBytes(io.airlift.units.DataSize.succinctBytes) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) OrcLazyObject(com.facebook.hive.orc.lazy.OrcLazyObject) FunctionAndTypeManager.createTestFunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) UNKNOWN(com.facebook.presto.orc.metadata.KeyProvider.UNKNOWN) SqlDate(com.facebook.presto.common.type.SqlDate) Lists(com.google.common.collect.Lists) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) HIVE_ORC_BUILD_STRIDE_DICTIONARY(com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ArrayType(com.facebook.presto.common.type.ArrayType) CharType(com.facebook.presto.common.type.CharType) MAX_BATCH_SIZE(com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE) Functions(com.google.common.base.Functions) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Properties(java.util.Properties) NOOP_ORC_LOCAL_MEMORY_CONTEXT(com.facebook.presto.orc.NoopOrcLocalMemoryContext.NOOP_ORC_LOCAL_MEMORY_CONTEXT) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) AbstractIterator(com.google.common.collect.AbstractIterator) FileOutputStream(java.io.FileOutputStream) SNAPPY(com.facebook.presto.orc.metadata.CompressionKind.SNAPPY) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) Field(java.lang.reflect.Field) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) LZ4(com.facebook.presto.orc.metadata.CompressionKind.LZ4) FloatWritable(org.apache.hadoop.io.FloatWritable) RowType(com.facebook.presto.common.type.RowType) ZSTD(com.facebook.presto.orc.metadata.CompressionKind.ZSTD) FunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) BOTH(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode.BOTH) BigintRange(com.facebook.presto.common.predicate.TupleDomainFilter.BigintRange) LongWritable(org.apache.hadoop.io.LongWritable) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) TestDoubleRange(com.facebook.presto.orc.TrackingTupleDomainFilter.TestDoubleRange) OrcConf(org.apache.orc.OrcConf) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) RuntimeStats(com.facebook.presto.common.RuntimeStats) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) NONE(com.facebook.presto.orc.metadata.CompressionKind.NONE) Timestamp(java.sql.Timestamp) VarcharType(com.facebook.presto.common.type.VarcharType) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) NOOP_ORC_AGGREGATED_MEMORY_CONTEXT(com.facebook.presto.orc.NoopOrcAggregatedMemoryContext.NOOP_ORC_AGGREGATED_MEMORY_CONTEXT) Objects(java.util.Objects) DataSize(io.airlift.units.DataSize) List(java.util.List) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) LocalDate(java.time.LocalDate) Entry(java.util.Map.Entry) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) Optional(java.util.Optional) OrcDecompressor.createOrcDecompressor(com.facebook.presto.orc.OrcDecompressor.createOrcDecompressor) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) IntStream(java.util.stream.IntStream) TypeInfoFactory.getCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo) MapType(com.facebook.presto.common.type.MapType) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) HIVE_ORC_ENTROPY_STRING_THRESHOLD(com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Function(java.util.function.Function) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) ZLIB(com.facebook.presto.orc.metadata.CompressionKind.ZLIB) Varchars.truncateToLength(com.facebook.presto.common.type.Varchars.truncateToLength) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) ByteWritable(org.apache.hadoop.io.ByteWritable) Objects.requireNonNull(java.util.Objects.requireNonNull) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Type(com.facebook.presto.common.type.Type) NamedTypeSignature(com.facebook.presto.common.type.NamedTypeSignature) Iterator(java.util.Iterator) Assert.fail(org.testng.Assert.fail) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) Decimals(com.facebook.presto.common.type.Decimals) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) Maps(com.google.common.collect.Maps) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) Date(java.sql.Date) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) JobConf(org.apache.hadoop.mapred.JobConf) DWRF(com.facebook.presto.orc.OrcTester.Format.DWRF) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) TestingOrcPredicate.createOrcPredicate(com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate) Assert.assertTrue(org.testng.Assert.assertTrue) RowFieldName(com.facebook.presto.common.type.RowFieldName) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) Block(com.facebook.presto.common.block.Block) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DoubleRange(com.facebook.presto.common.predicate.TupleDomainFilter.DoubleRange) SECONDS(java.util.concurrent.TimeUnit.SECONDS) InputStream(java.io.InputStream) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) BigInteger(java.math.BigInteger) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice)

Aggregations

HIVE_ORC_BUILD_STRIDE_DICTIONARY (com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY)1 HIVE_ORC_COMPRESSION (com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_COMPRESSION)1 HIVE_ORC_DICTIONARY_ENCODING_INTERVAL (com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL)1 HIVE_ORC_ENTROPY_STRING_THRESHOLD (com.facebook.hive.orc.OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD)1 OrcLazyObject (com.facebook.hive.orc.lazy.OrcLazyObject)1 Page (com.facebook.presto.common.Page)1 RuntimeStats (com.facebook.presto.common.RuntimeStats)1 Subfield (com.facebook.presto.common.Subfield)1 Block (com.facebook.presto.common.block.Block)1 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)1 OutputStreamDataSink (com.facebook.presto.common.io.OutputStreamDataSink)1 FilterFunction (com.facebook.presto.common.predicate.FilterFunction)1 TupleDomainFilter (com.facebook.presto.common.predicate.TupleDomainFilter)1 BigintRange (com.facebook.presto.common.predicate.TupleDomainFilter.BigintRange)1 DoubleRange (com.facebook.presto.common.predicate.TupleDomainFilter.DoubleRange)1 IS_NOT_NULL (com.facebook.presto.common.predicate.TupleDomainFilter.IS_NOT_NULL)1 IS_NULL (com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL)1 ArrayType (com.facebook.presto.common.type.ArrayType)1 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)1 BOOLEAN (com.facebook.presto.common.type.BooleanType.BOOLEAN)1