use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.
the class ORCVectorizedResolver method getFieldsForBatch.
/**
* Returns the resolved list of list of OneFields given a
* VectorizedRowBatch
*
* @param batch unresolved batch
* @return the resolved batch mapped to the Greenplum type
*/
@Override
public List<List<OneField>> getFieldsForBatch(OneRow batch) {
ensureFunctionsAreInitialized();
VectorizedRowBatch vectorizedBatch = (VectorizedRowBatch) batch.getData();
int batchSize = vectorizedBatch.size;
// The resolved batch returns a list of the list of OneField that
// matches the size of the batch. Every internal list, has a list of
// OneFields with size the number of columns
List<List<OneField>> resolvedBatch = prepareResolvedBatch(batchSize);
// index to the projected columns
int columnIndex = 0;
OneField[] oneFields;
for (ColumnDescriptor columnDescriptor : columnDescriptors) {
if (!columnDescriptor.isProjected()) {
oneFields = ORCVectorizedMappingFunctions.getNullResultSet(columnDescriptor.columnTypeCode(), batchSize);
} else {
TypeDescription orcColumn = positionalAccess ? columnIndex < readSchema.getChildren().size() ? readSchema.getChildren().get(columnIndex) : null : readFields.get(columnDescriptor.columnName());
if (orcColumn == null) {
// this column is missing in the underlying ORC file, but
// it is defined in the Greenplum table. This can happen
// when a schema evolves, for example the original
// ORC-backed table had 4 columns, and at a later point in
// time a fifth column was added. Files written before the
// column was added will have 4 columns, and new files
// will have 5 columns
oneFields = ORCVectorizedMappingFunctions.getNullResultSet(columnDescriptor.columnTypeCode(), batchSize);
} else if (orcColumn.getCategory().isPrimitive() || orcColumn.getCategory() == TypeDescription.Category.LIST) {
oneFields = functions[columnIndex].apply(vectorizedBatch, vectorizedBatch.cols[columnIndex], typeOidMappings[columnIndex]);
columnIndex++;
} else {
throw new UnsupportedTypeException(String.format("Unable to resolve column '%s' with category '%s'. Only primitive and lists of primitive types are supported.", readSchema.getFieldNames().get(columnIndex), orcColumn.getCategory()));
}
}
// processing. We need to add it to the corresponding list
for (int row = 0; row < batchSize; row++) {
resolvedBatch.get(row).add(oneFields[row]);
}
}
return resolvedBatch;
}
use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.
the class HiveClientWrapper method getSchema.
/**
* Populates the given metadata object with the given table's fields and partitions,
* The partition fields are added at the end of the table schema.
* Throws an exception if the table contains unsupported field types.
* Supported HCatalog types: TINYINT,
* SMALLINT, INT, BIGINT, BOOLEAN, FLOAT, DOUBLE, STRING, BINARY, TIMESTAMP,
* DATE, DECIMAL, VARCHAR, CHAR.
*
* @param tbl Hive table
* @param metadata schema of given table
*/
public void getSchema(Table tbl, Metadata metadata) {
int hiveColumnsSize = tbl.getSd().getColsSize();
int hivePartitionsSize = tbl.getPartitionKeysSize();
LOG.debug("Hive table: {} fields. {} partitions.", hiveColumnsSize, hivePartitionsSize);
// check hive fields
try {
List<FieldSchema> hiveColumns = tbl.getSd().getCols();
for (FieldSchema hiveCol : hiveColumns) {
metadata.addField(hiveUtilities.mapHiveType(hiveCol));
}
// check partition fields
List<FieldSchema> hivePartitions = tbl.getPartitionKeys();
for (FieldSchema hivePart : hivePartitions) {
metadata.addField(hiveUtilities.mapHiveType(hivePart));
}
} catch (UnsupportedTypeException e) {
String errorMsg = "Failed to retrieve metadata for table " + metadata.getItem() + ". " + e.getMessage();
throw new UnsupportedTypeException(errorMsg);
}
}
use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.
the class HiveColumnarSerdeResolver method traverseTuple.
/**
* Handle a Hive record.
* Supported object categories:
* Primitive - including NULL
* Struct (used by ColumnarSerDe to store primitives) - cannot be NULL
* <p/>
* Any other category will throw UnsupportedTypeException
*/
private void traverseTuple(Object obj, ObjectInspector objInspector) throws BadRecordException {
ObjectInspector.Category category = objInspector.getCategory();
if ((obj == null) && (category != ObjectInspector.Category.PRIMITIVE)) {
throw new BadRecordException("NULL Hive composite object");
}
switch(category) {
case PRIMITIVE:
resolvePrimitive(obj, (PrimitiveObjectInspector) objInspector);
break;
case STRUCT:
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<?> list = soi.getStructFieldsDataAsList(obj);
if (list == null) {
throw new BadRecordException("Illegal value NULL for Hive data type Struct");
}
Map<String, Integer> columnNameToStructIndexMap = IntStream.range(0, fields.size()).boxed().collect(Collectors.toMap(i -> StringUtils.lowerCase(fields.get(i).getFieldName()), i -> i));
List<ColumnDescriptor> tupleDescription = context.getTupleDescription();
for (int j = 0; j < tupleDescription.size(); j++) {
ColumnDescriptor columnDescriptor = tupleDescription.get(j);
String lowercaseColumnName = StringUtils.lowerCase(columnDescriptor.columnName());
Integer i = columnNameToStructIndexMap.get(lowercaseColumnName);
Integer structIndex = hiveIndexes.get(j);
HivePartition partition;
if ((partition = partitionColumnNames.get(lowercaseColumnName)) != null) {
// Skip partitioned columns
addPartitionColumn(partition.getType(), partition.getValue());
} else if (!columnDescriptor.isProjected()) {
// Non-projected fields will be sent as null values.
// This case is invoked only in the top level of fields and
// not when interpreting fields of type struct.
traverseTuple(null, fields.get(i).getFieldObjectInspector());
} else if (structIndex < list.size()) {
traverseTuple(list.get(structIndex), fields.get(i).getFieldObjectInspector());
} else {
traverseTuple(null, fields.get(i).getFieldObjectInspector());
}
}
break;
default:
throw new UnsupportedTypeException("Hive object category: " + objInspector.getCategory() + " unsupported");
}
}
use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.
the class HiveResolver method initPartitionFields.
/*
* The partition fields are initialized one time base on userData provided
* by the fragmenter.
*/
void initPartitionFields() {
partitionColumnNames = new HashMap<>();
List<HivePartition> hivePartitionList = metadata.getPartitions();
if (hivePartitionList == null || hivePartitionList.size() == 0) {
// no partition column information
return;
}
for (HivePartition partition : hivePartitionList) {
String columnName = partition.getName();
String type = partition.getType();
String val = partition.getValue();
DataType convertedType;
Object convertedValue;
boolean isDefaultPartition;
// check if value is default partition
isDefaultPartition = isDefaultPartition(type, val);
// ignore the type's parameters
String typeName = type.replaceAll("\\(.*\\)", "");
switch(typeName) {
case serdeConstants.STRING_TYPE_NAME:
convertedType = DataType.TEXT;
convertedValue = isDefaultPartition ? null : val;
break;
case serdeConstants.BOOLEAN_TYPE_NAME:
convertedType = DataType.BOOLEAN;
convertedValue = isDefaultPartition ? null : Boolean.valueOf(val);
break;
case serdeConstants.TINYINT_TYPE_NAME:
case serdeConstants.SMALLINT_TYPE_NAME:
convertedType = DataType.SMALLINT;
convertedValue = isDefaultPartition ? null : Short.parseShort(val);
break;
case serdeConstants.INT_TYPE_NAME:
convertedType = DataType.INTEGER;
convertedValue = isDefaultPartition ? null : Integer.parseInt(val);
break;
case serdeConstants.BIGINT_TYPE_NAME:
convertedType = DataType.BIGINT;
convertedValue = isDefaultPartition ? null : Long.parseLong(val);
break;
case serdeConstants.FLOAT_TYPE_NAME:
convertedType = DataType.REAL;
convertedValue = isDefaultPartition ? null : Float.parseFloat(val);
break;
case serdeConstants.DOUBLE_TYPE_NAME:
convertedType = DataType.FLOAT8;
convertedValue = isDefaultPartition ? null : Double.parseDouble(val);
break;
case serdeConstants.TIMESTAMP_TYPE_NAME:
convertedType = DataType.TIMESTAMP;
convertedValue = isDefaultPartition ? null : Timestamp.valueOf(val);
break;
case serdeConstants.DATE_TYPE_NAME:
convertedType = DataType.DATE;
convertedValue = isDefaultPartition ? null : Date.valueOf(val);
break;
case serdeConstants.DECIMAL_TYPE_NAME:
convertedType = DataType.NUMERIC;
convertedValue = isDefaultPartition ? null : HiveDecimal.create(val).bigDecimalValue().toString();
break;
case serdeConstants.VARCHAR_TYPE_NAME:
convertedType = DataType.VARCHAR;
convertedValue = isDefaultPartition ? null : val;
break;
case serdeConstants.CHAR_TYPE_NAME:
convertedType = DataType.BPCHAR;
convertedValue = isDefaultPartition ? null : val;
break;
case serdeConstants.BINARY_TYPE_NAME:
convertedType = DataType.BYTEA;
convertedValue = isDefaultPartition ? null : val.getBytes();
break;
default:
throw new UnsupportedTypeException("Unsupported partition type: " + type);
}
if (columnDescriptorContainsColumn(columnName)) {
partitionColumnNames.put(StringUtils.lowerCase(columnName), new OneField(convertedType.getOID(), convertedValue));
}
}
numberOfPartitions = partitionColumnNames.size();
}
use of org.greenplum.pxf.api.error.UnsupportedTypeException in project pxf by greenplum-db.
the class HiveUtilitiesTest method validateSchema.
@Test
public void validateSchema() {
String columnName = "abc";
Integer[] gpdbModifiers = {};
hiveUtilities.validateTypeCompatible(DataType.SMALLINT, gpdbModifiers, EnumHiveToGpdbType.TinyintType.getTypeName(), columnName);
hiveUtilities.validateTypeCompatible(DataType.SMALLINT, gpdbModifiers, EnumHiveToGpdbType.SmallintType.getTypeName(), columnName);
// Both Hive and GPDB types have the same modifiers
gpdbModifiers = new Integer[] { 38, 18 };
hiveUtilities.validateTypeCompatible(DataType.NUMERIC, gpdbModifiers, "decimal(38,18)", columnName);
// GPDB datatype doesn't require modifiers, they are empty, Hive has non-empty modifiers
// Types are compatible in this case
gpdbModifiers = new Integer[] {};
hiveUtilities.validateTypeCompatible(DataType.NUMERIC, gpdbModifiers, "decimal(38,18)", columnName);
gpdbModifiers = null;
hiveUtilities.validateTypeCompatible(DataType.NUMERIC, gpdbModifiers, "decimal(38,18)", columnName);
// GPDB has wider modifiers than Hive, types are compatible
gpdbModifiers = new Integer[] { 11, 3 };
hiveUtilities.validateTypeCompatible(DataType.NUMERIC, gpdbModifiers, "decimal(10,2)", columnName);
// GPDB has lesser modifiers than Hive, types aren't compatible
gpdbModifiers = new Integer[] { 38, 17 };
Integer[] finalGpdbModifiers = gpdbModifiers;
UnsupportedTypeException e = assertThrows(UnsupportedTypeException.class, () -> hiveUtilities.validateTypeCompatible(DataType.NUMERIC, finalGpdbModifiers, "decimal(38,18)", columnName), "should fail with incompatible modifiers message");
String errorMsg = "Invalid definition for column " + columnName + ": modifiers are not compatible, " + Arrays.toString(new String[] { "38", "18" }) + ", " + Arrays.toString(new String[] { "38", "17" });
assertEquals(errorMsg, e.getMessage());
// Different types, which are not mapped to each other
Integer[] finalGpdbModifiers1 = new Integer[] {};
e = assertThrows(UnsupportedTypeException.class, () -> hiveUtilities.validateTypeCompatible(DataType.NUMERIC, finalGpdbModifiers1, "boolean", columnName), "should fail with incompatible types message");
errorMsg = "Invalid definition for column " + columnName + ": expected GPDB type " + DataType.BOOLEAN + ", actual GPDB type " + DataType.NUMERIC;
assertEquals(errorMsg, e.getMessage());
}
Aggregations