Search in sources :

Example 1 with UnsupportedTypeException

use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.

the class ORCVectorizedResolver method getFieldsForBatch.

/**
 * Returns the resolved list of list of OneFields given a
 * VectorizedRowBatch
 *
 * @param batch unresolved batch
 * @return the resolved batch mapped to the Greenplum type
 */
@Override
public List<List<OneField>> getFieldsForBatch(OneRow batch) {
    ensureFunctionsAreInitialized();
    VectorizedRowBatch vectorizedBatch = (VectorizedRowBatch) batch.getData();
    int batchSize = vectorizedBatch.size;
    // The resolved batch returns a list of the list of OneField that
    // matches the size of the batch. Every internal list, has a list of
    // OneFields with size the number of columns
    List<List<OneField>> resolvedBatch = prepareResolvedBatch(batchSize);
    // index to the projected columns
    int columnIndex = 0;
    OneField[] oneFields;
    for (ColumnDescriptor columnDescriptor : columnDescriptors) {
        if (!columnDescriptor.isProjected()) {
            oneFields = ORCVectorizedMappingFunctions.getNullResultSet(columnDescriptor.columnTypeCode(), batchSize);
        } else {
            TypeDescription orcColumn = positionalAccess ? columnIndex < readSchema.getChildren().size() ? readSchema.getChildren().get(columnIndex) : null : readFields.get(columnDescriptor.columnName());
            if (orcColumn == null) {
                // this column is missing in the underlying ORC file, but
                // it is defined in the Greenplum table. This can happen
                // when a schema evolves, for example the original
                // ORC-backed table had 4 columns, and at a later point in
                // time a fifth column was added. Files written before the
                // column was added will have 4 columns, and new files
                // will have 5 columns
                oneFields = ORCVectorizedMappingFunctions.getNullResultSet(columnDescriptor.columnTypeCode(), batchSize);
            } else if (orcColumn.getCategory().isPrimitive() || orcColumn.getCategory() == TypeDescription.Category.LIST) {
                oneFields = functions[columnIndex].apply(vectorizedBatch, vectorizedBatch.cols[columnIndex], typeOidMappings[columnIndex]);
                columnIndex++;
            } else {
                throw new UnsupportedTypeException(String.format("Unable to resolve column '%s' with category '%s'. Only primitive and lists of primitive types are supported.", readSchema.getFieldNames().get(columnIndex), orcColumn.getCategory()));
            }
        }
        // processing. We need to add it to the corresponding list
        for (int row = 0; row < batchSize; row++) {
            resolvedBatch.get(row).add(oneFields[row]);
        }
    }
    return resolvedBatch;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) ColumnDescriptor(org.greenplum.pxf.api.utilities.ColumnDescriptor) TypeDescription(org.apache.orc.TypeDescription) UnsupportedTypeException(org.greenplum.pxf.api.error.UnsupportedTypeException) ArrayList(java.util.ArrayList) List(java.util.List) OneField(org.greenplum.pxf.api.OneField)

Example 2 with UnsupportedTypeException

use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.

the class HiveClientWrapper method getSchema.

/**
 * Populates the given metadata object with the given table's fields and partitions,
 * The partition fields are added at the end of the table schema.
 * Throws an exception if the table contains unsupported field types.
 * Supported HCatalog types: TINYINT,
 * SMALLINT, INT, BIGINT, BOOLEAN, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP,
 * DATE, DECIMAL, VARCHAR, CHAR.
 *
 * @param tbl      Hive table
 * @param metadata schema of given table
 */
public void getSchema(Table tbl, Metadata metadata) {
    int hiveColumnsSize = tbl.getSd().getColsSize();
    int hivePartitionsSize = tbl.getPartitionKeysSize();
    LOG.debug("Hive table: {} fields. {} partitions.", hiveColumnsSize, hivePartitionsSize);
    // check hive fields
    try {
        List<FieldSchema> hiveColumns = tbl.getSd().getCols();
        for (FieldSchema hiveCol : hiveColumns) {
            metadata.addField(hiveUtilities.mapHiveType(hiveCol));
        }
        // check partition fields
        List<FieldSchema> hivePartitions = tbl.getPartitionKeys();
        for (FieldSchema hivePart : hivePartitions) {
            metadata.addField(hiveUtilities.mapHiveType(hivePart));
        }
    } catch (UnsupportedTypeException e) {
        String errorMsg = "Failed to retrieve metadata for table " + metadata.getItem() + ". " + e.getMessage();
        throw new UnsupportedTypeException(errorMsg);
    }
}
Also used : FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) UnsupportedTypeException(org.greenplum.pxf.api.error.UnsupportedTypeException)

Example 3 with UnsupportedTypeException

use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.

the class HiveColumnarSerdeResolver method traverseTuple.

/**
 * Handle a Hive record.
 * Supported object categories:
 * Primitive - including NULL
 * Struct (used by ColumnarSerDe to store primitives) - cannot be NULL
 * <p/>
 * Any other category will throw UnsupportedTypeException
 */
private void traverseTuple(Object obj, ObjectInspector objInspector) throws BadRecordException {
    ObjectInspector.Category category = objInspector.getCategory();
    if ((obj == null) && (category != ObjectInspector.Category.PRIMITIVE)) {
        throw new BadRecordException("NULL Hive composite object");
    }
    switch(category) {
        case PRIMITIVE:
            resolvePrimitive(obj, (PrimitiveObjectInspector) objInspector);
            break;
        case STRUCT:
            StructObjectInspector soi = (StructObjectInspector) objInspector;
            List<? extends StructField> fields = soi.getAllStructFieldRefs();
            List<?> list = soi.getStructFieldsDataAsList(obj);
            if (list == null) {
                throw new BadRecordException("Illegal value NULL for Hive data type Struct");
            }
            Map<String, Integer> columnNameToStructIndexMap = IntStream.range(0, fields.size()).boxed().collect(Collectors.toMap(i -> StringUtils.lowerCase(fields.get(i).getFieldName()), i -> i));
            List<ColumnDescriptor> tupleDescription = context.getTupleDescription();
            for (int j = 0; j < tupleDescription.size(); j++) {
                ColumnDescriptor columnDescriptor = tupleDescription.get(j);
                String lowercaseColumnName = StringUtils.lowerCase(columnDescriptor.columnName());
                Integer i = columnNameToStructIndexMap.get(lowercaseColumnName);
                Integer structIndex = hiveIndexes.get(j);
                HivePartition partition;
                if ((partition = partitionColumnNames.get(lowercaseColumnName)) != null) {
                    // Skip partitioned columns
                    addPartitionColumn(partition.getType(), partition.getValue());
                } else if (!columnDescriptor.isProjected()) {
                    // Non-projected fields will be sent as null values.
                    // This case is invoked only in the top level of fields and
                    // not when interpreting fields of type struct.
                    traverseTuple(null, fields.get(i).getFieldObjectInspector());
                } else if (structIndex < list.size()) {
                    traverseTuple(list.get(structIndex), fields.get(i).getFieldObjectInspector());
                } else {
                    traverseTuple(null, fields.get(i).getFieldObjectInspector());
                }
            }
            break;
        default:
            throw new UnsupportedTypeException("Hive object category: " + objInspector.getCategory() + " unsupported");
    }
}
Also used : ColumnDescriptor(org.greenplum.pxf.api.utilities.ColumnDescriptor) IntStream(java.util.stream.IntStream) StringUtils(org.apache.commons.lang.StringUtils) Writable(org.apache.hadoop.io.Writable) HashMap(java.util.HashMap) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) OneRow(org.greenplum.pxf.api.OneRow) READ_COLUMN_NAMES_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) Map(java.util.Map) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) VARCHAR(org.greenplum.pxf.api.io.DataType.VARCHAR) Utilities(org.greenplum.pxf.api.utilities.Utilities) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) Timestamp(java.sql.Timestamp) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) OneField(org.greenplum.pxf.api.OneField) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) Collectors(java.util.stream.Collectors) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) Date(java.sql.Date) JobConf(org.apache.hadoop.mapred.JobConf) BadRecordException(org.greenplum.pxf.api.error.BadRecordException) List(java.util.List) UnsupportedTypeException(org.greenplum.pxf.api.error.UnsupportedTypeException) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) RequestContext(org.greenplum.pxf.api.model.RequestContext) Collections(java.util.Collections) OutputFormat(org.greenplum.pxf.api.model.OutputFormat) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) BadRecordException(org.greenplum.pxf.api.error.BadRecordException) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) ColumnDescriptor(org.greenplum.pxf.api.utilities.ColumnDescriptor) UnsupportedTypeException(org.greenplum.pxf.api.error.UnsupportedTypeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 4 with UnsupportedTypeException

use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.

the class HiveResolver method initPartitionFields.

/*
     * The partition fields are initialized one time base on userData provided
     * by the fragmenter.
     */
void initPartitionFields() {
    partitionColumnNames = new HashMap<>();
    List<HivePartition> hivePartitionList = metadata.getPartitions();
    if (hivePartitionList == null || hivePartitionList.size() == 0) {
        // no partition column information
        return;
    }
    for (HivePartition partition : hivePartitionList) {
        String columnName = partition.getName();
        String type = partition.getType();
        String val = partition.getValue();
        DataType convertedType;
        Object convertedValue;
        boolean isDefaultPartition;
        // check if value is default partition
        isDefaultPartition = isDefaultPartition(type, val);
        // ignore the type's parameters
        String typeName = type.replaceAll("\\(.*\\)", "");
        switch(typeName) {
            case serdeConstants.STRING_TYPE_NAME:
                convertedType = DataType.TEXT;
                convertedValue = isDefaultPartition ? null : val;
                break;
            case serdeConstants.BOOLEAN_TYPE_NAME:
                convertedType = DataType.BOOLEAN;
                convertedValue = isDefaultPartition ? null : Boolean.valueOf(val);
                break;
            case serdeConstants.TINYINT_TYPE_NAME:
            case serdeConstants.SMALLINT_TYPE_NAME:
                convertedType = DataType.SMALLINT;
                convertedValue = isDefaultPartition ? null : Short.parseShort(val);
                break;
            case serdeConstants.INT_TYPE_NAME:
                convertedType = DataType.INTEGER;
                convertedValue = isDefaultPartition ? null : Integer.parseInt(val);
                break;
            case serdeConstants.BIGINT_TYPE_NAME:
                convertedType = DataType.BIGINT;
                convertedValue = isDefaultPartition ? null : Long.parseLong(val);
                break;
            case serdeConstants.FLOAT_TYPE_NAME:
                convertedType = DataType.REAL;
                convertedValue = isDefaultPartition ? null : Float.parseFloat(val);
                break;
            case serdeConstants.DOUBLE_TYPE_NAME:
                convertedType = DataType.FLOAT8;
                convertedValue = isDefaultPartition ? null : Double.parseDouble(val);
                break;
            case serdeConstants.TIMESTAMP_TYPE_NAME:
                convertedType = DataType.TIMESTAMP;
                convertedValue = isDefaultPartition ? null : Timestamp.valueOf(val);
                break;
            case serdeConstants.DATE_TYPE_NAME:
                convertedType = DataType.DATE;
                convertedValue = isDefaultPartition ? null : Date.valueOf(val);
                break;
            case serdeConstants.DECIMAL_TYPE_NAME:
                convertedType = DataType.NUMERIC;
                convertedValue = isDefaultPartition ? null : HiveDecimal.create(val).bigDecimalValue().toString();
                break;
            case serdeConstants.VARCHAR_TYPE_NAME:
                convertedType = DataType.VARCHAR;
                convertedValue = isDefaultPartition ? null : val;
                break;
            case serdeConstants.CHAR_TYPE_NAME:
                convertedType = DataType.BPCHAR;
                convertedValue = isDefaultPartition ? null : val;
                break;
            case serdeConstants.BINARY_TYPE_NAME:
                convertedType = DataType.BYTEA;
                convertedValue = isDefaultPartition ? null : val.getBytes();
                break;
            default:
                throw new UnsupportedTypeException("Unsupported partition type: " + type);
        }
        if (columnDescriptorContainsColumn(columnName)) {
            partitionColumnNames.put(StringUtils.lowerCase(columnName), new OneField(convertedType.getOID(), convertedValue));
        }
    }
    numberOfPartitions = partitionColumnNames.size();
}
Also used : DataType(org.greenplum.pxf.api.io.DataType) UnsupportedTypeException(org.greenplum.pxf.api.error.UnsupportedTypeException) OneField(org.greenplum.pxf.api.OneField)

Example 5 with UnsupportedTypeException

use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.

the class HiveUtilitiesTest method validateSchema.

@Test
public void validateSchema() {
    String columnName = "abc";
    Integer[] gpdbModifiers = {};
    hiveUtilities.validateTypeCompatible(DataType.SMALLINT, gpdbModifiers, EnumHiveToGpdbType.TinyintType.getTypeName(), columnName);
    hiveUtilities.validateTypeCompatible(DataType.SMALLINT, gpdbModifiers, EnumHiveToGpdbType.SmallintType.getTypeName(), columnName);
    // Both Hive and GPDB types have the same modifiers
    gpdbModifiers = new Integer[] { 38, 18 };
    hiveUtilities.validateTypeCompatible(DataType.NUMERIC, gpdbModifiers, "decimal(38,18)", columnName);
    // GPDB datatype doesn't require modifiers, they are empty, Hive has non-empty modifiers
    // Types are compatible in this case
    gpdbModifiers = new Integer[] {};
    hiveUtilities.validateTypeCompatible(DataType.NUMERIC, gpdbModifiers, "decimal(38,18)", columnName);
    gpdbModifiers = null;
    hiveUtilities.validateTypeCompatible(DataType.NUMERIC, gpdbModifiers, "decimal(38,18)", columnName);
    // GPDB has wider modifiers than Hive, types are compatible
    gpdbModifiers = new Integer[] { 11, 3 };
    hiveUtilities.validateTypeCompatible(DataType.NUMERIC, gpdbModifiers, "decimal(10,2)", columnName);
    // GPDB has lesser modifiers than Hive, types aren't compatible
    gpdbModifiers = new Integer[] { 38, 17 };
    Integer[] finalGpdbModifiers = gpdbModifiers;
    UnsupportedTypeException e = assertThrows(UnsupportedTypeException.class, () -> hiveUtilities.validateTypeCompatible(DataType.NUMERIC, finalGpdbModifiers, "decimal(38,18)", columnName), "should fail with incompatible modifiers message");
    String errorMsg = "Invalid definition for column " + columnName + ": modifiers are not compatible, " + Arrays.toString(new String[] { "38", "18" }) + ", " + Arrays.toString(new String[] { "38", "17" });
    assertEquals(errorMsg, e.getMessage());
    // Different types, which are not mapped to each other
    Integer[] finalGpdbModifiers1 = new Integer[] {};
    e = assertThrows(UnsupportedTypeException.class, () -> hiveUtilities.validateTypeCompatible(DataType.NUMERIC, finalGpdbModifiers1, "boolean", columnName), "should fail with incompatible types message");
    errorMsg = "Invalid definition for column " + columnName + ": expected GPDB type " + DataType.BOOLEAN + ", actual GPDB type " + DataType.NUMERIC;
    assertEquals(errorMsg, e.getMessage());
}
Also used : UnsupportedTypeException(org.greenplum.pxf.api.error.UnsupportedTypeException) Test(org.junit.jupiter.api.Test)

Aggregations

UnsupportedTypeException (org.greenplum.pxf.api.error.UnsupportedTypeException)13 OneField (org.greenplum.pxf.api.OneField)5 ArrayList (java.util.ArrayList)4 ColumnDescriptor (org.greenplum.pxf.api.utilities.ColumnDescriptor)4 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)3 DataType (org.greenplum.pxf.api.io.DataType)3 Date (java.sql.Date)2 HashMap (java.util.HashMap)2 List (java.util.List)2 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)2 TypeDescription (org.apache.orc.TypeDescription)2 OneRow (org.greenplum.pxf.api.OneRow)2 EnumGpdbType (org.greenplum.pxf.api.utilities.EnumGpdbType)2 Test (org.junit.jupiter.api.Test)2 BigDecimal (java.math.BigDecimal)1 Timestamp (java.sql.Timestamp)1