use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class DDLTask method alterTableAlterPart.
/**
* Alter partition column type in a table
*
* @param db
* Database to rename the partition.
* @param alterPartitionDesc
* change partition column type.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
*/
private int alterTableAlterPart(Hive db, AlterTableAlterPartDesc alterPartitionDesc) throws HiveException {
Table tbl = db.getTable(alterPartitionDesc.getTableName(), true);
// This is checked by DDLSemanticAnalyzer
assert (tbl.isPartitioned());
List<FieldSchema> newPartitionKeys = new ArrayList<FieldSchema>();
// with a non null value before trying to alter the partition column type.
try {
Set<Partition> partitions = db.getAllPartitionsOf(tbl);
int colIndex = -1;
for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
colIndex++;
if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
break;
}
}
if (colIndex == -1 || colIndex == tbl.getTTable().getPartitionKeys().size()) {
throw new HiveException("Cannot find partition column " + alterPartitionDesc.getPartKeySpec().getName());
}
TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(alterPartitionDesc.getPartKeySpec().getType());
ObjectInspector outputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
// For all the existing partitions, check if the value can be type casted to a non-null object
for (Partition part : partitions) {
if (part.getName().equals(conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME))) {
continue;
}
try {
String value = part.getValues().get(colIndex);
Object convertedValue = converter.convert(value);
if (convertedValue == null) {
throw new HiveException(" Converting from " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + value + " resulted in NULL object");
}
} catch (Exception e) {
throw new HiveException("Exception while converting " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + part.getValues().get(colIndex));
}
}
} catch (Exception e) {
throw new HiveException("Exception while checking type conversion of existing partition values to " + alterPartitionDesc.getPartKeySpec() + " : " + e.getMessage());
}
for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
newPartitionKeys.add(alterPartitionDesc.getPartKeySpec());
} else {
newPartitionKeys.add(col);
}
}
tbl.getTTable().setPartitionKeys(newPartitionKeys);
db.alterTable(tbl, null);
work.getInputs().add(new ReadEntity(tbl));
// We've already locked the table as the input, don't relock it as the output.
addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class VectorSerializeRow method serializeMapWrite.
private void serializeMapWrite(MapColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException {
MapTypeInfo typeInfo = (MapTypeInfo) field.typeInfo;
MapObjectInspector objectInspector = (MapObjectInspector) field.objectInspector;
final ColumnVector keyColumnVector = colVector.keys;
final ColumnVector valueColumnVector = colVector.values;
final Field keyField = field.children[0];
final Field valueField = field.children[1];
final int offset = (int) colVector.offsets[adjustedBatchIndex];
final int size = (int) colVector.lengths[adjustedBatchIndex];
final Map map = (Map) vectorExtractRow.extractRowColumn(colVector, typeInfo, objectInspector, adjustedBatchIndex);
serializeWrite.beginMap(map);
for (int i = 0; i < size; i++) {
if (i > 0) {
serializeWrite.separateKeyValuePair();
}
serializeWrite(keyColumnVector, keyField, offset + i);
serializeWrite.separateKey();
serializeWrite(valueColumnVector, valueField, offset + i);
}
serializeWrite.finishMap();
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class FunctionRegistry method getMethodInternal.
/**
* Gets the closest matching method corresponding to the argument list from a
* list of methods.
*
* @param mlist
* The list of methods to inspect.
* @param exact
* Boolean to indicate whether this is an exact match or not.
* @param argumentsPassed
* The classes for the argument.
* @return The matching method.
*/
public static Method getMethodInternal(Class<?> udfClass, List<Method> mlist, boolean exact, List<TypeInfo> argumentsPassed) throws UDFArgumentException {
// result
List<Method> udfMethods = new ArrayList<Method>();
// The cost of the result
int leastConversionCost = Integer.MAX_VALUE;
for (Method m : mlist) {
List<TypeInfo> argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size());
if (argumentsAccepted == null) {
// null means the method does not accept number of arguments passed.
continue;
}
boolean match = (argumentsAccepted.size() == argumentsPassed.size());
int conversionCost = 0;
for (int i = 0; i < argumentsPassed.size() && match; i++) {
int cost = matchCost(argumentsPassed.get(i), argumentsAccepted.get(i), exact);
if (cost == -1) {
match = false;
} else {
conversionCost += cost;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Method " + (match ? "did" : "didn't") + " match: passed = " + argumentsPassed + " accepted = " + argumentsAccepted + " method = " + m);
}
if (match) {
// Always choose the function with least implicit conversions.
if (conversionCost < leastConversionCost) {
udfMethods.clear();
udfMethods.add(m);
leastConversionCost = conversionCost;
// Found an exact match
if (leastConversionCost == 0) {
break;
}
} else if (conversionCost == leastConversionCost) {
// Ambiguous call: two methods with the same number of implicit
// conversions
udfMethods.add(m);
// Don't break! We might find a better match later.
} else {
// do nothing if implicitConversions > leastImplicitConversions
}
}
}
if (udfMethods.size() == 0) {
// No matching methods found
throw new NoMatchingMethodException(udfClass, argumentsPassed, mlist);
}
if (udfMethods.size() > 1) {
// First try selecting methods based on the type affinity of the arguments passed
// to the candidate method arguments.
filterMethodsByTypeAffinity(udfMethods, argumentsPassed);
}
if (udfMethods.size() > 1) {
// if the only difference is numeric types, pick the method
// with the smallest overall numeric type.
int lowestNumericType = Integer.MAX_VALUE;
boolean multiple = true;
Method candidate = null;
List<TypeInfo> referenceArguments = null;
for (Method m : udfMethods) {
int maxNumericType = 0;
List<TypeInfo> argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size());
if (referenceArguments == null) {
// keep the arguments for reference - we want all the non-numeric
// arguments to be the same
referenceArguments = argumentsAccepted;
}
Iterator<TypeInfo> referenceIterator = referenceArguments.iterator();
for (TypeInfo accepted : argumentsAccepted) {
TypeInfo reference = referenceIterator.next();
boolean acceptedIsPrimitive = false;
PrimitiveCategory acceptedPrimCat = PrimitiveCategory.UNKNOWN;
if (accepted.getCategory() == Category.PRIMITIVE) {
acceptedIsPrimitive = true;
acceptedPrimCat = ((PrimitiveTypeInfo) accepted).getPrimitiveCategory();
}
if (acceptedIsPrimitive && TypeInfoUtils.numericTypes.containsKey(acceptedPrimCat)) {
// We're looking for the udf with the smallest maximum numeric type.
int typeValue = TypeInfoUtils.numericTypes.get(acceptedPrimCat);
maxNumericType = typeValue > maxNumericType ? typeValue : maxNumericType;
} else if (!accepted.equals(reference)) {
// another. We give up at this point.
throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist);
}
}
if (lowestNumericType > maxNumericType) {
multiple = false;
lowestNumericType = maxNumericType;
candidate = m;
} else if (maxNumericType == lowestNumericType) {
// multiple udfs with the same max type. Unless we find a lower one
// we'll give up.
multiple = true;
}
}
if (!multiple) {
return candidate;
} else {
throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist);
}
}
return udfMethods.get(0);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class MapJoinTestData method generateTypesFromTypeInfos.
private static GenerateType[] generateTypesFromTypeInfos(TypeInfo[] typeInfos) {
final int size = typeInfos.length;
GenerateType[] generateTypes = new GenerateType[size];
for (int i = 0; i < size; i++) {
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfos[i];
GenerateCategory category = GenerateCategory.generateCategoryFromPrimitiveCategory(primitiveTypeInfo.getPrimitiveCategory());
generateTypes[i] = new GenerateType(category);
}
return generateTypes;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class TestMapJoinOperator method doTestString.
public void doTestString(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception {
int rowCount = 10000;
HiveConf hiveConf = new HiveConf();
String[] bigTableColumnNames = new String[] { "b1" };
TypeInfo[] bigTableTypeInfos = new TypeInfo[] { TypeInfoFactory.stringTypeInfo };
int[] bigTableKeyColumnNums = new int[] { 0 };
String[] smallTableValueColumnNames = new String[] { "sv1", "sv2" };
TypeInfo[] smallTableValueTypeInfos = new TypeInfo[] { TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo };
int[] bigTableRetainColumnNums = new int[] { 0 };
int[] smallTableRetainKeyColumnNums = new int[] {};
int[] smallTableRetainValueColumnNums = new int[] { 0, 1 };
SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters();
// ----------------------------------------------------------------------------------------------
MapJoinTestDescription testDesc = new MapJoinTestDescription(hiveConf, vectorMapJoinVariation, bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, smallTableGenerationParameters);
// Prepare data. Good for ANY implementation variation.
MapJoinTestData testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10);
executeTest(testDesc, testData);
}
Aggregations