Search in sources :

Example 1 with MAP

use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.

the class RcFileTester method testMapRoundTrip.

private void testMapRoundTrip(Type type, Iterable<?> writeValues, Set<Format> skipFormats) throws Exception {
    // json does not support null keys, so we just write the first value
    Object nullKeyWrite = Iterables.getFirst(writeValues, null);
    // values in simple map and mix in some null values
    testRoundTripType(createMapType(type), transform(insertNullEvery(5, writeValues), value -> toHiveMap(nullKeyWrite, value)), skipFormats);
}
Also used : SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) LZ4(com.facebook.presto.rcfile.RcFileTester.Compression.LZ4) ZLIB(com.facebook.presto.rcfile.RcFileTester.Compression.ZLIB) ZonedDateTime(java.time.ZonedDateTime) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) ROW(com.facebook.presto.common.type.StandardTypes.ROW) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) LazyBinaryArray(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray) IntWritable(org.apache.hadoop.io.IntWritable) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) ZoneId(java.time.ZoneId) ARRAY(com.facebook.presto.common.type.StandardTypes.ARRAY) UncheckedIOException(java.io.UncheckedIOException) LzoCodec(com.hadoop.compression.lzo.LzoCodec) BooleanWritable(org.apache.hadoop.io.BooleanWritable) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) Decimals.rescale(com.facebook.presto.common.type.Decimals.rescale) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) RecordReader(org.apache.hadoop.mapred.RecordReader) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) Iterables(com.google.common.collect.Iterables) StandardTypes(com.facebook.presto.common.type.StandardTypes) RcFileDecoderUtils.findFirstSyncPosition(com.facebook.presto.rcfile.RcFileDecoderUtils.findFirstSyncPosition) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) StructObject(org.apache.hadoop.hive.serde2.StructObject) Functions.constant(com.google.common.base.Functions.constant) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) PRESTO_RCFILE_WRITER_VERSION(com.facebook.presto.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION) FunctionAndTypeManager.createTestFunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) SqlDate(com.facebook.presto.common.type.SqlDate) Lists(com.google.common.collect.Lists) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ArrayType(com.facebook.presto.common.type.ArrayType) LinkedHashSet(java.util.LinkedHashSet) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Properties(java.util.Properties) AbstractIterator(com.google.common.collect.AbstractIterator) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) UTC_KEY(com.facebook.presto.common.type.TimeZoneKey.UTC_KEY) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) NULL(org.apache.hadoop.mapred.Reporter.NULL) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) SIZE_OF_LONG(io.airlift.slice.SizeOf.SIZE_OF_LONG) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) FloatWritable(org.apache.hadoop.io.FloatWritable) RowType(com.facebook.presto.common.type.RowType) FunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) Iterables.transform(com.google.common.collect.Iterables.transform) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) BZIP2(com.facebook.presto.rcfile.RcFileTester.Compression.BZIP2) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) LongWritable(org.apache.hadoop.io.LongWritable) MAP(com.facebook.presto.common.type.StandardTypes.MAP) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) NONE(com.facebook.presto.rcfile.RcFileTester.Compression.NONE) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) SIZE_OF_INT(io.airlift.slice.SizeOf.SIZE_OF_INT) ImmutableSet(com.google.common.collect.ImmutableSet) Files.createTempDir(com.google.common.io.Files.createTempDir) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Timestamp(java.sql.Timestamp) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) VarcharType(com.facebook.presto.common.type.VarcharType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Objects(java.util.Objects) DataSize(io.airlift.units.DataSize) List(java.util.List) LocalDate(java.time.LocalDate) Entry(java.util.Map.Entry) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) MapType(com.facebook.presto.common.type.MapType) Assert.assertNull(org.testng.Assert.assertNull) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY(com.facebook.presto.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) OutputStreamSliceOutput(io.airlift.slice.OutputStreamSliceOutput) COMPRESS_CODEC(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS_CODEC) ImmutableList(com.google.common.collect.ImmutableList) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Math.toIntExact(java.lang.Math.toIntExact) SNAPPY(com.facebook.presto.rcfile.RcFileTester.Compression.SNAPPY) Type(com.facebook.presto.common.type.Type) Iterator(java.util.Iterator) Iterators.advance(com.google.common.collect.Iterators.advance) HadoopNative(com.facebook.presto.hadoop.HadoopNative) FileInputStream(java.io.FileInputStream) Decimals(com.facebook.presto.common.type.Decimals) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) Date(java.sql.Date) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) JobConf(org.apache.hadoop.mapred.JobConf) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) Collectors.toList(java.util.stream.Collectors.toList) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Closeable(java.io.Closeable) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) Block(com.facebook.presto.common.block.Block) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Collections(java.util.Collections) BYTE(io.airlift.units.DataSize.Unit.BYTE) InputStream(java.io.InputStream) StructObject(org.apache.hadoop.hive.serde2.StructObject)

Example 2 with MAP

use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.

the class QueryRewriter method applyPropertyOverride.

private static List<Property> applyPropertyOverride(List<Property> properties, List<Property> overrides) {
    Map<String, Expression> propertyMap = properties.stream().collect(toImmutableMap(property -> property.getName().getValue().toLowerCase(ENGLISH), Property::getValue));
    Map<String, Expression> overrideMap = overrides.stream().collect(toImmutableMap(property -> property.getName().getValue().toLowerCase(ENGLISH), Property::getValue));
    return Stream.concat(propertyMap.entrySet().stream(), overrideMap.entrySet().stream()).collect(Collectors.toMap(Entry::getKey, Entry::getValue, (original, override) -> override)).entrySet().stream().map(entry -> new Property(new Identifier(entry.getKey()), entry.getValue())).collect(toImmutableList());
}
Also used : INCLUDING(com.facebook.presto.sql.tree.LikeClause.PropertiesOption.INCLUDING) TypeSignature(com.facebook.presto.common.type.TypeSignature) QueryObjectBundle(com.facebook.presto.verifier.framework.QueryObjectBundle) MAP(com.facebook.presto.common.type.StandardTypes.MAP) SelectItem(com.facebook.presto.sql.tree.SelectItem) Map(java.util.Map) DropView(com.facebook.presto.sql.tree.DropView) CreateTable(com.facebook.presto.sql.tree.CreateTable) ENGLISH(java.util.Locale.ENGLISH) TIME(com.facebook.presto.common.type.TimeType.TIME) LikeClause(com.facebook.presto.sql.tree.LikeClause) Query(com.facebook.presto.sql.tree.Query) QuerySpecification(com.facebook.presto.sql.tree.QuerySpecification) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ClusterType(com.facebook.presto.verifier.framework.ClusterType) Set(java.util.Set) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) SqlParser(com.facebook.presto.sql.parser.SqlParser) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) UNKNOWN(com.facebook.presto.common.type.UnknownType.UNKNOWN) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) DropTable(com.facebook.presto.sql.tree.DropTable) Entry(java.util.Map.Entry) Optional(java.util.Optional) VIEW(com.facebook.presto.sql.tree.ShowCreate.Type.VIEW) Select(com.facebook.presto.sql.tree.Select) ResultSetMetaData(java.sql.ResultSetMetaData) CreateView(com.facebook.presto.sql.tree.CreateView) QualifiedName(com.facebook.presto.sql.tree.QualifiedName) MapType(com.facebook.presto.common.type.MapType) DecimalType(com.facebook.presto.common.type.DecimalType) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) Field(com.facebook.presto.common.type.RowType.Field) TIMESTAMP_WITH_TIME_ZONE(com.facebook.presto.common.type.TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) SHOW_CREATE_VIEW_CONVERTER(com.facebook.presto.verifier.framework.CreateViewVerification.SHOW_CREATE_VIEW_CONVERTER) VerifierUtil.getColumnTypes(com.facebook.presto.verifier.framework.VerifierUtil.getColumnTypes) DATE(com.facebook.presto.common.type.DateType.DATE) SingleColumn(com.facebook.presto.sql.tree.SingleColumn) ArrayList(java.util.ArrayList) Identifier(com.facebook.presto.sql.tree.Identifier) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) REWRITE(com.facebook.presto.verifier.framework.QueryStage.REWRITE) ArrayType(com.facebook.presto.common.type.ArrayType) AllColumns(com.facebook.presto.sql.tree.AllColumns) Cast(com.facebook.presto.sql.tree.Cast) Type(com.facebook.presto.common.type.Type) CreateTableAsSelect(com.facebook.presto.sql.tree.CreateTableAsSelect) Property(com.facebook.presto.sql.tree.Property) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Language(org.intellij.lang.annotations.Language) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ResultSetConverter(com.facebook.presto.verifier.prestoaction.PrestoAction.ResultSetConverter) Insert(com.facebook.presto.sql.tree.Insert) QueryException(com.facebook.presto.verifier.framework.QueryException) UUID.randomUUID(java.util.UUID.randomUUID) Expression(com.facebook.presto.sql.tree.Expression) ShowCreate(com.facebook.presto.sql.tree.ShowCreate) PrestoAction(com.facebook.presto.verifier.prestoaction.PrestoAction) VerifierUtil.getColumnNames(com.facebook.presto.verifier.framework.VerifierUtil.getColumnNames) PARSING_OPTIONS(com.facebook.presto.verifier.framework.VerifierUtil.PARSING_OPTIONS) RowType(com.facebook.presto.common.type.RowType) Statement(com.facebook.presto.sql.tree.Statement) Identifier(com.facebook.presto.sql.tree.Identifier) Expression(com.facebook.presto.sql.tree.Expression) Property(com.facebook.presto.sql.tree.Property)

Example 3 with MAP

use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.

the class ParquetPageSourceFactory method checkSchemaMatch.

public static boolean checkSchemaMatch(org.apache.parquet.schema.Type parquetType, Type type) {
    String prestoType = type.getTypeSignature().getBase();
    if (parquetType instanceof GroupType) {
        GroupType groupType = parquetType.asGroupType();
        switch(prestoType) {
            case ROW:
                RowType rowType = (RowType) type;
                Map<String, Type> prestoFieldMap = rowType.getFields().stream().collect(Collectors.toMap(field -> field.getName().get().toLowerCase(Locale.ENGLISH), field -> field.getType()));
                for (int i = 0; i < groupType.getFields().size(); i++) {
                    org.apache.parquet.schema.Type parquetFieldType = groupType.getFields().get(i);
                    String fieldName = parquetFieldType.getName().toLowerCase(Locale.ENGLISH);
                    Type prestoFieldType = prestoFieldMap.get(fieldName);
                    if (prestoFieldType != null && !checkSchemaMatch(parquetFieldType, prestoFieldType)) {
                        return false;
                    }
                }
                return true;
            case MAP:
                if (groupType.getFields().size() != 1) {
                    return false;
                }
                org.apache.parquet.schema.Type mapKeyType = groupType.getFields().get(0);
                if (mapKeyType instanceof GroupType) {
                    GroupType mapGroupType = mapKeyType.asGroupType();
                    return mapGroupType.getFields().size() == 2 && checkSchemaMatch(mapGroupType.getFields().get(0), type.getTypeParameters().get(0)) && checkSchemaMatch(mapGroupType.getFields().get(1), type.getTypeParameters().get(1));
                }
                return false;
            case ARRAY:
                /* array has a standard 3-level structure with middle level repeated group with a single field:
                     *  optional group my_list (LIST) {
                     *     repeated group element {
                     *        required type field;
                     *     };
                     *  }
                     *  Backward-compatibility support for 2-level arrays:
                     *   optional group my_list (LIST) {
                     *      repeated type field;
                     *   }
                     *  field itself could be primitive or group
                     */
                if (groupType.getFields().size() != 1) {
                    return false;
                }
                org.apache.parquet.schema.Type bagType = groupType.getFields().get(0);
                if (bagType.isPrimitive()) {
                    return checkSchemaMatch(bagType.asPrimitiveType(), type.getTypeParameters().get(0));
                }
                GroupType bagGroupType = bagType.asGroupType();
                return checkSchemaMatch(bagGroupType, type.getTypeParameters().get(0)) || (bagGroupType.getFields().size() == 1 && checkSchemaMatch(bagGroupType.getFields().get(0), type.getTypeParameters().get(0)));
            default:
                return false;
        }
    }
    checkArgument(parquetType.isPrimitive(), "Unexpected parquet type for column: %s " + parquetType.getName());
    PrimitiveTypeName parquetTypeName = parquetType.asPrimitiveType().getPrimitiveTypeName();
    switch(parquetTypeName) {
        case INT64:
            return prestoType.equals(BIGINT) || prestoType.equals(DECIMAL) || prestoType.equals(TIMESTAMP);
        case INT32:
            return prestoType.equals(INTEGER) || prestoType.equals(BIGINT) || prestoType.equals(SMALLINT) || prestoType.equals(DATE) || prestoType.equals(DECIMAL) || prestoType.equals(TINYINT);
        case BOOLEAN:
            return prestoType.equals(StandardTypes.BOOLEAN);
        case FLOAT:
            return prestoType.equals(REAL);
        case DOUBLE:
            return prestoType.equals(StandardTypes.DOUBLE);
        case BINARY:
            return prestoType.equals(VARBINARY) || prestoType.equals(VARCHAR) || prestoType.startsWith(CHAR) || prestoType.equals(DECIMAL);
        case INT96:
            return prestoType.equals(TIMESTAMP);
        case FIXED_LEN_BYTE_ARRAY:
            return prestoType.equals(DECIMAL);
        default:
            throw new IllegalArgumentException("Unexpected parquet type name: " + parquetTypeName);
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) DateTimeZone(org.joda.time.DateTimeZone) TINYINT(com.facebook.presto.common.type.StandardTypes.TINYINT) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) ROW(com.facebook.presto.common.type.StandardTypes.ROW) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) Set(java.util.Set) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ARRAY(com.facebook.presto.common.type.StandardTypes.ARRAY) ConnectorSession(com.facebook.presto.spi.ConnectorSession) CHAR(com.facebook.presto.common.type.StandardTypes.CHAR) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) ColumnIO(org.apache.parquet.io.ColumnIO) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ArrayList(java.util.ArrayList) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) DATE(com.facebook.presto.common.type.StandardTypes.DATE) IOException(java.io.IOException) ParquetTypeUtils.nestedColumnPath(com.facebook.presto.parquet.ParquetTypeUtils.nestedColumnPath) Domain(com.facebook.presto.common.predicate.Domain) INTEGER(com.facebook.presto.common.type.StandardTypes.INTEGER) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) REAL(com.facebook.presto.common.type.StandardTypes.REAL) RowType(com.facebook.presto.common.type.RowType) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) ParquetTypeUtils.lookupColumnByName(com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) MAP(com.facebook.presto.common.type.StandardTypes.MAP) Locale(java.util.Locale) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) BIGINT(com.facebook.presto.common.type.StandardTypes.BIGINT) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) ImmutableSet(com.google.common.collect.ImmutableSet) GroupType(org.apache.parquet.schema.GroupType) ImmutableMap(com.google.common.collect.ImmutableMap) Collectors(java.util.stream.Collectors) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) VARCHAR(com.facebook.presto.common.type.StandardTypes.VARCHAR) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) Entry(java.util.Map.Entry) Optional(java.util.Optional) TIMESTAMP(com.facebook.presto.common.type.StandardTypes.TIMESTAMP) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) SMALLINT(com.facebook.presto.common.type.StandardTypes.SMALLINT) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) DECIMAL(com.facebook.presto.common.type.StandardTypes.DECIMAL) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) VARBINARY(com.facebook.presto.common.type.StandardTypes.VARBINARY) Storage(com.facebook.presto.hive.metastore.Storage) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) PERMISSION_DENIED(com.facebook.presto.spi.StandardErrorCode.PERMISSION_DENIED) Field(com.facebook.presto.parquet.Field) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) AccessControlException(org.apache.hadoop.security.AccessControlException) SYNTHESIZED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) HiveSessionProperties.columnIndexFilterEnabled(com.facebook.presto.hive.HiveSessionProperties.columnIndexFilterEnabled) RowType(com.facebook.presto.common.type.RowType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Type(com.facebook.presto.common.type.Type) GroupType(org.apache.parquet.schema.GroupType) RowType(com.facebook.presto.common.type.RowType) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName)

Example 4 with MAP

use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.

the class ParquetPageSourceFactory method createParquetPageSource.

public static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, SchemaTableName tableName, boolean useParquetColumnNames, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TypeManager typeManager, StandardFunctionResolution functionResolution, TupleDomain<HiveColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, HiveFileContext hiveFileContext, ParquetMetadataSource parquetMetadataSource, boolean columnIndexFilterEnabled) {
    AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
    ParquetDataSource dataSource = null;
    try {
        FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(user, path, configuration).openFile(path, hiveFileContext);
        dataSource = buildHdfsParquetDataSource(inputStream, path, stats);
        ParquetMetadata parquetMetadata = parquetMetadataSource.getParquetMetadata(dataSource, fileSize, hiveFileContext.isCacheable()).getParquetMetadata();
        if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
            return new AggregatedParquetPageSource(columns, parquetMetadata, typeManager, functionResolution);
        }
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        Optional<MessageType> message = columns.stream().filter(column -> column.getColumnType() == REGULAR || isPushedDownSubfield(column)).map(column -> getColumnType(typeManager.getType(column.getTypeSignature()), fileSchema, useParquetColumnNames, column, tableName, path)).filter(Optional::isPresent).map(Optional::get).map(type -> new MessageType(fileSchema.getName(), type)).reduce(MessageType::union);
        MessageType requestedSchema = message.orElse(new MessageType(fileSchema.getName(), ImmutableList.of()));
        ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                footerBlocks.add(block);
            }
        }
        Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
        TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
        Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
        final ParquetDataSource finalDataSource = dataSource;
        ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
        List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
        for (BlockMetaData block : footerBlocks.build()) {
            Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
            if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
                blocks.add(block);
                blockIndexStores.add(columnIndexStore.orElse(null));
                hiveFileContext.incrementCounter("parquet.blocksRead", 1);
                hiveFileContext.incrementCounter("parquet.rowsRead", block.getRowCount());
                hiveFileContext.incrementCounter("parquet.totalBytesRead", block.getTotalByteSize());
            } else {
                hiveFileContext.incrementCounter("parquet.blocksSkipped", 1);
                hiveFileContext.incrementCounter("parquet.rowsSkipped", block.getRowCount());
                hiveFileContext.incrementCounter("parquet.totalBytesSkipped", block.getTotalByteSize());
            }
        }
        MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
        ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks.build(), dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
        ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        for (HiveColumnHandle column : columns) {
            checkArgument(column.getColumnType() == REGULAR || column.getColumnType() == SYNTHESIZED, "column type must be regular or synthesized column");
            String name = column.getName();
            Type type = typeManager.getType(column.getTypeSignature());
            namesBuilder.add(name);
            typesBuilder.add(type);
            if (column.getColumnType() == SYNTHESIZED) {
                Subfield pushedDownSubfield = getPushedDownSubfield(column);
                List<String> nestedColumnPath = nestedColumnPath(pushedDownSubfield);
                Optional<ColumnIO> columnIO = findNestedColumnIO(lookupColumnByName(messageColumnIO, pushedDownSubfield.getRootName()), nestedColumnPath);
                if (columnIO.isPresent()) {
                    fieldsBuilder.add(constructField(type, columnIO.get()));
                } else {
                    fieldsBuilder.add(Optional.empty());
                }
            } else if (getParquetType(type, fileSchema, useParquetColumnNames, column, tableName, path).isPresent()) {
                String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName();
                fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));
            } else {
                fieldsBuilder.add(Optional.empty());
            }
        }
        return new ParquetPageSource(parquetReader, typesBuilder.build(), fieldsBuilder.build(), namesBuilder.build(), hiveFileContext.getStats());
    } catch (Exception e) {
        try {
            if (dataSource != null) {
                dataSource.close();
            }
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        if (e instanceof ParquetCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, e);
        }
        if (e instanceof AccessControlException) {
            throw new PrestoException(PERMISSION_DENIED, e.getMessage(), e);
        }
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) DateTimeZone(org.joda.time.DateTimeZone) TINYINT(com.facebook.presto.common.type.StandardTypes.TINYINT) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) ROW(com.facebook.presto.common.type.StandardTypes.ROW) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) Set(java.util.Set) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ARRAY(com.facebook.presto.common.type.StandardTypes.ARRAY) ConnectorSession(com.facebook.presto.spi.ConnectorSession) CHAR(com.facebook.presto.common.type.StandardTypes.CHAR) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) ColumnIO(org.apache.parquet.io.ColumnIO) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ArrayList(java.util.ArrayList) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) DATE(com.facebook.presto.common.type.StandardTypes.DATE) IOException(java.io.IOException) ParquetTypeUtils.nestedColumnPath(com.facebook.presto.parquet.ParquetTypeUtils.nestedColumnPath) Domain(com.facebook.presto.common.predicate.Domain) INTEGER(com.facebook.presto.common.type.StandardTypes.INTEGER) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) REAL(com.facebook.presto.common.type.StandardTypes.REAL) RowType(com.facebook.presto.common.type.RowType) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) ParquetTypeUtils.lookupColumnByName(com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) MAP(com.facebook.presto.common.type.StandardTypes.MAP) Locale(java.util.Locale) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) BIGINT(com.facebook.presto.common.type.StandardTypes.BIGINT) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) ImmutableSet(com.google.common.collect.ImmutableSet) GroupType(org.apache.parquet.schema.GroupType) ImmutableMap(com.google.common.collect.ImmutableMap) Collectors(java.util.stream.Collectors) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) VARCHAR(com.facebook.presto.common.type.StandardTypes.VARCHAR) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) Entry(java.util.Map.Entry) Optional(java.util.Optional) TIMESTAMP(com.facebook.presto.common.type.StandardTypes.TIMESTAMP) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) SMALLINT(com.facebook.presto.common.type.StandardTypes.SMALLINT) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) DECIMAL(com.facebook.presto.common.type.StandardTypes.DECIMAL) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) VARBINARY(com.facebook.presto.common.type.StandardTypes.VARBINARY) Storage(com.facebook.presto.hive.metastore.Storage) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) PERMISSION_DENIED(com.facebook.presto.spi.StandardErrorCode.PERMISSION_DENIED) Field(com.facebook.presto.parquet.Field) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) AccessControlException(org.apache.hadoop.security.AccessControlException) SYNTHESIZED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) HiveSessionProperties.columnIndexFilterEnabled(com.facebook.presto.hive.HiveSessionProperties.columnIndexFilterEnabled) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ImmutableList(com.google.common.collect.ImmutableList) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Predicate(com.facebook.presto.parquet.predicate.Predicate) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) MessageType(org.apache.parquet.schema.MessageType) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) Subfield(com.facebook.presto.common.Subfield) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) Optional(java.util.Optional) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) AccessControlException(org.apache.hadoop.security.AccessControlException) IOException(java.io.IOException) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) ColumnIO(org.apache.parquet.io.ColumnIO) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) AccessControlException(org.apache.hadoop.security.AccessControlException) RowType(com.facebook.presto.common.type.RowType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Type(com.facebook.presto.common.type.Type) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 5 with MAP

use of com.facebook.presto.common.type.StandardTypes.MAP in project presto by prestodb.

the class RcFileTester method preprocessWriteValueOld.

private static Object preprocessWriteValueOld(Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    } else if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    } else if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    } else if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    } else if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    } else if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    } else if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    } else if (type instanceof VarcharType) {
        return value;
    } else if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    } else if (type.equals(DATE)) {
        int days = ((SqlDate) value).getDays();
        LocalDate localDate = LocalDate.ofEpochDay(days);
        ZonedDateTime zonedDateTime = localDate.atStartOfDay(ZoneId.systemDefault());
        long millis = zonedDateTime.toEpochSecond() * 1000;
        Date date = new Date(0);
        // mills must be set separately to avoid masking
        date.setTime(millis);
        return date;
    } else if (type.equals(TIMESTAMP)) {
        long millisUtc = (int) ((SqlTimestamp) value).getMillisUtc();
        return new Timestamp(millisUtc);
    } else if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    } else if (type.getTypeSignature().getBase().equals(ARRAY)) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueOld(elementType, element)).collect(toList());
    } else if (type.getTypeSignature().getBase().equals(MAP)) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueOld(keyType, entry.getKey()), preprocessWriteValueOld(valueType, entry.getValue()));
        }
        return newMap;
    } else if (type.getTypeSignature().getBase().equals(ROW)) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueOld(fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) LZ4(com.facebook.presto.rcfile.RcFileTester.Compression.LZ4) ZLIB(com.facebook.presto.rcfile.RcFileTester.Compression.ZLIB) ZonedDateTime(java.time.ZonedDateTime) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) ROW(com.facebook.presto.common.type.StandardTypes.ROW) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) LazyBinaryArray(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray) IntWritable(org.apache.hadoop.io.IntWritable) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Set(java.util.Set) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) ZoneId(java.time.ZoneId) ARRAY(com.facebook.presto.common.type.StandardTypes.ARRAY) UncheckedIOException(java.io.UncheckedIOException) LzoCodec(com.hadoop.compression.lzo.LzoCodec) BooleanWritable(org.apache.hadoop.io.BooleanWritable) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) Decimals.rescale(com.facebook.presto.common.type.Decimals.rescale) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) RecordReader(org.apache.hadoop.mapred.RecordReader) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) Iterables(com.google.common.collect.Iterables) StandardTypes(com.facebook.presto.common.type.StandardTypes) RcFileDecoderUtils.findFirstSyncPosition(com.facebook.presto.rcfile.RcFileDecoderUtils.findFirstSyncPosition) DecimalType(com.facebook.presto.common.type.DecimalType) Slice(io.airlift.slice.Slice) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) StructObject(org.apache.hadoop.hive.serde2.StructObject) Functions.constant(com.google.common.base.Functions.constant) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) PRESTO_RCFILE_WRITER_VERSION(com.facebook.presto.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION) FunctionAndTypeManager.createTestFunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) SqlDate(com.facebook.presto.common.type.SqlDate) Lists(com.google.common.collect.Lists) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ArrayType(com.facebook.presto.common.type.ArrayType) LinkedHashSet(java.util.LinkedHashSet) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Properties(java.util.Properties) AbstractIterator(com.google.common.collect.AbstractIterator) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) UTC_KEY(com.facebook.presto.common.type.TimeZoneKey.UTC_KEY) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) NULL(org.apache.hadoop.mapred.Reporter.NULL) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) SIZE_OF_LONG(io.airlift.slice.SizeOf.SIZE_OF_LONG) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) FloatWritable(org.apache.hadoop.io.FloatWritable) RowType(com.facebook.presto.common.type.RowType) FunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) Iterables.transform(com.google.common.collect.Iterables.transform) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) BZIP2(com.facebook.presto.rcfile.RcFileTester.Compression.BZIP2) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) LongWritable(org.apache.hadoop.io.LongWritable) MAP(com.facebook.presto.common.type.StandardTypes.MAP) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) NONE(com.facebook.presto.rcfile.RcFileTester.Compression.NONE) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) SIZE_OF_INT(io.airlift.slice.SizeOf.SIZE_OF_INT) ImmutableSet(com.google.common.collect.ImmutableSet) Files.createTempDir(com.google.common.io.Files.createTempDir) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Timestamp(java.sql.Timestamp) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) VarcharType(com.facebook.presto.common.type.VarcharType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) Objects(java.util.Objects) DataSize(io.airlift.units.DataSize) List(java.util.List) LocalDate(java.time.LocalDate) Entry(java.util.Map.Entry) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) MapType(com.facebook.presto.common.type.MapType) Assert.assertNull(org.testng.Assert.assertNull) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) Assert.assertEquals(org.testng.Assert.assertEquals) HashMap(java.util.HashMap) DoubleWritable(org.apache.hadoop.io.DoubleWritable) PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY(com.facebook.presto.rcfile.RcFileWriter.PRESTO_RCFILE_WRITER_VERSION_METADATA_KEY) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) OutputStreamSliceOutput(io.airlift.slice.OutputStreamSliceOutput) COMPRESS_CODEC(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.COMPRESS_CODEC) ImmutableList(com.google.common.collect.ImmutableList) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Math.toIntExact(java.lang.Math.toIntExact) SNAPPY(com.facebook.presto.rcfile.RcFileTester.Compression.SNAPPY) Type(com.facebook.presto.common.type.Type) Iterator(java.util.Iterator) Iterators.advance(com.google.common.collect.Iterators.advance) HadoopNative(com.facebook.presto.hadoop.HadoopNative) FileInputStream(java.io.FileInputStream) Decimals(com.facebook.presto.common.type.Decimals) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) Date(java.sql.Date) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) JobConf(org.apache.hadoop.mapred.JobConf) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) Collectors.toList(java.util.stream.Collectors.toList) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) Closeable(java.io.Closeable) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) Block(com.facebook.presto.common.block.Block) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Collections(java.util.Collections) BYTE(io.airlift.units.DataSize.Unit.BYTE) InputStream(java.io.InputStream) VarcharType(com.facebook.presto.common.type.VarcharType) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) ArrayList(java.util.ArrayList) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) LocalDate(java.time.LocalDate) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) Timestamp(java.sql.Timestamp) SqlDate(com.facebook.presto.common.type.SqlDate) LocalDate(java.time.LocalDate) Date(java.sql.Date) DecimalType(com.facebook.presto.common.type.DecimalType) ArrayType(com.facebook.presto.common.type.ArrayType) RowType(com.facebook.presto.common.type.RowType) VarcharType(com.facebook.presto.common.type.VarcharType) MapType(com.facebook.presto.common.type.MapType) Type(com.facebook.presto.common.type.Type) Entry(java.util.Map.Entry) ZonedDateTime(java.time.ZonedDateTime) DecimalType(com.facebook.presto.common.type.DecimalType) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) StructObject(org.apache.hadoop.hive.serde2.StructObject) Map(java.util.Map) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) ImmutableMap(com.google.common.collect.ImmutableMap) LazyBinaryMap(org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap) HashMap(java.util.HashMap)

Aggregations

RowType (com.facebook.presto.common.type.RowType)5 MAP (com.facebook.presto.common.type.StandardTypes.MAP)5 Type (com.facebook.presto.common.type.Type)5 ImmutableList (com.google.common.collect.ImmutableList)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Map (java.util.Map)5 Entry (java.util.Map.Entry)5 Optional (java.util.Optional)5 Set (java.util.Set)5 StandardTypes (com.facebook.presto.common.type.StandardTypes)4 ARRAY (com.facebook.presto.common.type.StandardTypes.ARRAY)4 ROW (com.facebook.presto.common.type.StandardTypes.ROW)4 ImmutableSet (com.google.common.collect.ImmutableSet)3 DataSize (io.airlift.units.DataSize)3 IOException (java.io.IOException)3 Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 DateTimeZone (org.joda.time.DateTimeZone)3