use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project presto by prestodb.
the class OrcTester method preprocessWriteValueOld.
private static Object preprocessWriteValueOld(TypeInfo typeInfo, Object value) {
if (value == null) {
return null;
}
switch(typeInfo.getCategory()) {
case PRIMITIVE:
PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
switch(primitiveCategory) {
case BOOLEAN:
return value;
case BYTE:
return ((Number) value).byteValue();
case SHORT:
return ((Number) value).shortValue();
case INT:
return ((Number) value).intValue();
case LONG:
return ((Number) value).longValue();
case FLOAT:
return ((Number) value).floatValue();
case DOUBLE:
return ((Number) value).doubleValue();
case DECIMAL:
return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
case STRING:
return value;
case CHAR:
return new HiveChar(value.toString(), ((CharTypeInfo) typeInfo).getLength());
case DATE:
int days = ((SqlDate) value).getDays();
LocalDate localDate = LocalDate.ofEpochDay(days);
ZonedDateTime zonedDateTime = localDate.atStartOfDay(ZoneId.systemDefault());
long millis = zonedDateTime.toEpochSecond() * 1000;
Date date = new Date(0);
// mills must be set separately to avoid masking
date.setTime(millis);
return date;
case TIMESTAMP:
long millisUtc = (int) ((SqlTimestamp) value).getMillisUtc();
return new Timestamp(millisUtc);
case BINARY:
return ((SqlVarbinary) value).getBytes();
}
break;
case MAP:
MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
Map<Object, Object> newMap = new HashMap<>();
for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
newMap.put(preprocessWriteValueOld(keyTypeInfo, entry.getKey()), preprocessWriteValueOld(valueTypeInfo, entry.getValue()));
}
return newMap;
case LIST:
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
List<Object> newList = new ArrayList<>(((Collection<?>) value).size());
for (Object element : (Iterable<?>) value) {
newList.add(preprocessWriteValueOld(elementTypeInfo, element));
}
return newList;
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<?> fieldValues = (List<?>) value;
List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
List<Object> newStruct = new ArrayList<>();
for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
newStruct.add(preprocessWriteValueOld(fieldTypeInfos.get(fieldId), fieldValues.get(fieldId)));
}
return newStruct;
}
throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s", typeInfo));
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class DDLSemanticAnalyzer method getFullPartitionSpecs.
/**
* Get the partition specs from the tree. This stores the full specification
* with the comparator operator into the output list.
*
* @param ast Tree to extract partitions from.
* @param tab Table.
* @return Map of partitions by prefix length. Most of the time prefix length will
* be the same for all partition specs, so we can just OR the expressions.
*/
private Map<Integer, List<ExprNodeGenericFuncDesc>> getFullPartitionSpecs(CommonTree ast, Table tab, boolean canGroupExprs) throws SemanticException {
String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME);
Map<String, String> colTypes = new HashMap<String, String>();
for (FieldSchema fs : tab.getPartitionKeys()) {
colTypes.put(fs.getName().toLowerCase(), fs.getType());
}
Map<Integer, List<ExprNodeGenericFuncDesc>> result = new HashMap<Integer, List<ExprNodeGenericFuncDesc>>();
for (int childIndex = 0; childIndex < ast.getChildCount(); childIndex++) {
Tree partSpecTree = ast.getChild(childIndex);
if (partSpecTree.getType() != HiveParser.TOK_PARTSPEC) {
continue;
}
ExprNodeGenericFuncDesc expr = null;
HashSet<String> names = new HashSet<String>(partSpecTree.getChildCount());
for (int i = 0; i < partSpecTree.getChildCount(); ++i) {
CommonTree partSpecSingleKey = (CommonTree) partSpecTree.getChild(i);
assert (partSpecSingleKey.getType() == HiveParser.TOK_PARTVAL);
String key = stripIdentifierQuotes(partSpecSingleKey.getChild(0).getText()).toLowerCase();
String operator = partSpecSingleKey.getChild(1).getText();
ASTNode partValNode = (ASTNode) partSpecSingleKey.getChild(2);
TypeCheckCtx typeCheckCtx = new TypeCheckCtx(null);
ExprNodeConstantDesc valExpr = (ExprNodeConstantDesc) TypeCheckProcFactory.genExprNode(partValNode, typeCheckCtx).get(partValNode);
Object val = valExpr.getValue();
boolean isDefaultPartitionName = val.equals(defaultPartitionName);
String type = colTypes.get(key);
PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(type);
if (type == null) {
throw new SemanticException("Column " + key + " not found");
}
// Create the corresponding hive expression to filter on partition columns.
if (!isDefaultPartitionName) {
if (!valExpr.getTypeString().equals(type)) {
Converter converter = ObjectInspectorConverters.getConverter(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(valExpr.getTypeInfo()), TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(pti));
val = converter.convert(valExpr.getValue());
}
}
ExprNodeColumnDesc column = new ExprNodeColumnDesc(pti, key, null, true);
ExprNodeGenericFuncDesc op;
if (!isDefaultPartitionName) {
op = makeBinaryPredicate(operator, column, new ExprNodeConstantDesc(pti, val));
} else {
GenericUDF originalOp = FunctionRegistry.getFunctionInfo(operator).getGenericUDF();
String fnName;
if (FunctionRegistry.isEq(originalOp)) {
fnName = "isnull";
} else if (FunctionRegistry.isNeq(originalOp)) {
fnName = "isnotnull";
} else {
throw new SemanticException("Cannot use " + operator + " in a default partition spec; only '=' and '!=' are allowed.");
}
op = makeUnaryPredicate(fnName, column);
}
// If it's multi-expr filter (e.g. a='5', b='2012-01-02'), AND with previous exprs.
expr = (expr == null) ? op : makeBinaryPredicate("and", expr, op);
names.add(key);
}
if (expr == null) {
continue;
}
// We got the expr for one full partition spec. Determine the prefix length.
int prefixLength = calculatePartPrefix(tab, names);
List<ExprNodeGenericFuncDesc> orExpr = result.get(prefixLength);
// If we don't, create a new separate filter. In most cases there will only be one.
if (orExpr == null) {
result.put(prefixLength, Lists.newArrayList(expr));
} else if (canGroupExprs) {
orExpr.set(0, makeBinaryPredicate("or", expr, orExpr.get(0)));
} else {
orExpr.add(expr);
}
}
return result;
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class Vectorizer method createVectorPTFInfo.
/*
* Create the additional vectorization PTF information needed by the VectorPTFOperator during
* execution.
*/
private static VectorPTFInfo createVectorPTFInfo(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
ArrayList<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
final int outputSize = outputSignature.size();
boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
ExprNodeDesc[] orderExprNodeDescs = vectorPTFDesc.getOrderExprNodeDescs();
ExprNodeDesc[] partitionExprNodeDescs = vectorPTFDesc.getPartitionExprNodeDescs();
String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
final int evaluatorCount = evaluatorFunctionNames.length;
WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
/*
* Output columns.
*/
int[] outputColumnProjectionMap = new int[outputSize];
// Evaluator results are first.
for (int i = 0; i < evaluatorCount; i++) {
ColumnInfo colInfo = outputSignature.get(i);
TypeInfo typeInfo = colInfo.getType();
final int outputColumnNum;
outputColumnNum = vContext.allocateScratchColumn(typeInfo);
outputColumnProjectionMap[i] = outputColumnNum;
}
// Followed by key and non-key input columns (some may be missing).
for (int i = evaluatorCount; i < outputSize; i++) {
ColumnInfo colInfo = outputSignature.get(i);
outputColumnProjectionMap[i] = vContext.getInputColumnIndex(colInfo.getInternalName());
}
/*
* Partition and order by.
*/
int[] partitionColumnMap;
Type[] partitionColumnVectorTypes;
VectorExpression[] partitionExpressions;
if (!isPartitionOrderBy) {
partitionColumnMap = null;
partitionColumnVectorTypes = null;
partitionExpressions = null;
} else {
final int partitionKeyCount = partitionExprNodeDescs.length;
partitionColumnMap = new int[partitionKeyCount];
partitionColumnVectorTypes = new Type[partitionKeyCount];
partitionExpressions = new VectorExpression[partitionKeyCount];
for (int i = 0; i < partitionKeyCount; i++) {
VectorExpression partitionExpression = vContext.getVectorExpression(partitionExprNodeDescs[i]);
TypeInfo typeInfo = partitionExpression.getOutputTypeInfo();
Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
partitionColumnVectorTypes[i] = columnVectorType;
partitionColumnMap[i] = partitionExpression.getOutputColumnNum();
partitionExpressions[i] = partitionExpression;
}
}
final int orderKeyCount = orderExprNodeDescs.length;
int[] orderColumnMap = new int[orderKeyCount];
Type[] orderColumnVectorTypes = new Type[orderKeyCount];
VectorExpression[] orderExpressions = new VectorExpression[orderKeyCount];
for (int i = 0; i < orderKeyCount; i++) {
VectorExpression orderExpression = vContext.getVectorExpression(orderExprNodeDescs[i]);
TypeInfo typeInfo = orderExpression.getOutputTypeInfo();
Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
orderColumnVectorTypes[i] = columnVectorType;
orderColumnMap[i] = orderExpression.getOutputColumnNum();
orderExpressions[i] = orderExpression;
}
ArrayList<Integer> keyInputColumns = new ArrayList<Integer>();
ArrayList<Integer> nonKeyInputColumns = new ArrayList<Integer>();
determineKeyAndNonKeyInputColumnMap(outputColumnProjectionMap, isPartitionOrderBy, orderColumnMap, partitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns);
int[] keyInputColumnMap = ArrayUtils.toPrimitive(keyInputColumns.toArray(new Integer[0]));
int[] nonKeyInputColumnMap = ArrayUtils.toPrimitive(nonKeyInputColumns.toArray(new Integer[0]));
VectorExpression[] evaluatorInputExpressions = new VectorExpression[evaluatorCount];
Type[] evaluatorInputColumnVectorTypes = new Type[evaluatorCount];
for (int i = 0; i < evaluatorCount; i++) {
String functionName = evaluatorFunctionNames[i];
WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
SupportedFunctionType functionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
VectorExpression inputVectorExpression;
final Type columnVectorType;
if (exprNodeDescList != null) {
// Validation has limited evaluatorInputExprNodeDescLists to size 1.
ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
// Determine input vector expression using the VectorizationContext.
inputVectorExpression = vContext.getVectorExpression(exprNodeDesc);
TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
} else {
inputVectorExpression = null;
columnVectorType = ColumnVector.Type.NONE;
}
evaluatorInputExpressions[i] = inputVectorExpression;
evaluatorInputColumnVectorTypes[i] = columnVectorType;
}
VectorPTFInfo vectorPTFInfo = new VectorPTFInfo();
vectorPTFInfo.setOutputColumnMap(outputColumnProjectionMap);
vectorPTFInfo.setPartitionColumnMap(partitionColumnMap);
vectorPTFInfo.setPartitionColumnVectorTypes(partitionColumnVectorTypes);
vectorPTFInfo.setPartitionExpressions(partitionExpressions);
vectorPTFInfo.setOrderColumnMap(orderColumnMap);
vectorPTFInfo.setOrderColumnVectorTypes(orderColumnVectorTypes);
vectorPTFInfo.setOrderExpressions(orderExpressions);
vectorPTFInfo.setEvaluatorInputExpressions(evaluatorInputExpressions);
vectorPTFInfo.setEvaluatorInputColumnVectorTypes(evaluatorInputColumnVectorTypes);
vectorPTFInfo.setKeyInputColumnMap(keyInputColumnMap);
vectorPTFInfo.setNonKeyInputColumnMap(nonKeyInputColumnMap);
return vectorPTFInfo;
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class Vectorizer method specializeMapJoinOperator.
Operator<? extends OperatorDesc> specializeMapJoinOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinDesc vectorDesc) throws HiveException {
Operator<? extends OperatorDesc> vectorOp = null;
Class<? extends Operator<?>> opClass = null;
VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE;
HashTableKind hashTableKind = HashTableKind.NONE;
HashTableKeyType hashTableKeyType = HashTableKeyType.NONE;
VectorMapJoinVariation vectorMapJoinVariation = VectorMapJoinVariation.NONE;
if (vectorDesc.getIsFastHashTableEnabled()) {
hashTableImplementationType = HashTableImplementationType.FAST;
} else {
hashTableImplementationType = HashTableImplementationType.OPTIMIZED;
}
int joinType = desc.getConds()[0].getType();
boolean isInnerBigOnly = false;
if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) {
isInnerBigOnly = true;
}
// By default, we can always use the multi-key class.
hashTableKeyType = HashTableKeyType.MULTI_KEY;
if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) {
// Look for single column optimization.
byte posBigTable = (byte) desc.getPosBigTable();
Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
if (bigTableKeyExprs.size() == 1) {
TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo();
LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName());
switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
case BOOLEAN:
hashTableKeyType = HashTableKeyType.BOOLEAN;
break;
case BYTE:
hashTableKeyType = HashTableKeyType.BYTE;
break;
case SHORT:
hashTableKeyType = HashTableKeyType.SHORT;
break;
case INT:
hashTableKeyType = HashTableKeyType.INT;
break;
case LONG:
hashTableKeyType = HashTableKeyType.LONG;
break;
case STRING:
case CHAR:
case VARCHAR:
case BINARY:
hashTableKeyType = HashTableKeyType.STRING;
default:
}
}
}
switch(joinType) {
case JoinDesc.INNER_JOIN:
if (!isInnerBigOnly) {
vectorMapJoinVariation = VectorMapJoinVariation.INNER;
hashTableKind = HashTableKind.HASH_MAP;
} else {
vectorMapJoinVariation = VectorMapJoinVariation.INNER_BIG_ONLY;
hashTableKind = HashTableKind.HASH_MULTISET;
}
break;
case JoinDesc.LEFT_OUTER_JOIN:
case JoinDesc.RIGHT_OUTER_JOIN:
vectorMapJoinVariation = VectorMapJoinVariation.OUTER;
hashTableKind = HashTableKind.HASH_MAP;
break;
case JoinDesc.LEFT_SEMI_JOIN:
vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI;
hashTableKind = HashTableKind.HASH_SET;
break;
default:
throw new HiveException("Unknown join type " + joinType);
}
LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name());
switch(hashTableKeyType) {
case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case LONG:
switch(vectorMapJoinVariation) {
case INNER:
opClass = VectorMapJoinInnerLongOperator.class;
break;
case INNER_BIG_ONLY:
opClass = VectorMapJoinInnerBigOnlyLongOperator.class;
break;
case LEFT_SEMI:
opClass = VectorMapJoinLeftSemiLongOperator.class;
break;
case OUTER:
opClass = VectorMapJoinOuterLongOperator.class;
break;
default:
throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
}
break;
case STRING:
switch(vectorMapJoinVariation) {
case INNER:
opClass = VectorMapJoinInnerStringOperator.class;
break;
case INNER_BIG_ONLY:
opClass = VectorMapJoinInnerBigOnlyStringOperator.class;
break;
case LEFT_SEMI:
opClass = VectorMapJoinLeftSemiStringOperator.class;
break;
case OUTER:
opClass = VectorMapJoinOuterStringOperator.class;
break;
default:
throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
}
break;
case MULTI_KEY:
switch(vectorMapJoinVariation) {
case INNER:
opClass = VectorMapJoinInnerMultiKeyOperator.class;
break;
case INNER_BIG_ONLY:
opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class;
break;
case LEFT_SEMI:
opClass = VectorMapJoinLeftSemiMultiKeyOperator.class;
break;
case OUTER:
opClass = VectorMapJoinOuterMultiKeyOperator.class;
break;
default:
throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
}
break;
default:
throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
}
boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED);
vectorDesc.setHashTableImplementationType(hashTableImplementationType);
vectorDesc.setHashTableKind(hashTableKind);
vectorDesc.setHashTableKeyType(hashTableKeyType);
vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation);
vectorDesc.setMinMaxEnabled(minMaxEnabled);
vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext, vectorDesc);
LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName());
return vectorOp;
}
use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.
the class DropPartitionHandler method genPartSpecs.
private Map<Integer, List<ExprNodeGenericFuncDesc>> genPartSpecs(Table table, List<Map<String, String>> partitions) throws SemanticException {
Map<Integer, List<ExprNodeGenericFuncDesc>> partSpecs = new HashMap<>();
int partPrefixLength = 0;
if (partitions.size() > 0) {
partPrefixLength = partitions.get(0).size();
// pick the length of the first ptn, we expect all ptns listed to have the same number of
// key-vals.
}
List<ExprNodeGenericFuncDesc> partitionDesc = new ArrayList<>();
for (Map<String, String> ptn : partitions) {
// convert each key-value-map to appropriate expression.
ExprNodeGenericFuncDesc expr = null;
for (Map.Entry<String, String> kvp : ptn.entrySet()) {
String key = kvp.getKey();
Object val = kvp.getValue();
String type = table.getPartColByName(key).getType();
PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(type);
ExprNodeColumnDesc column = new ExprNodeColumnDesc(pti, key, null, true);
ExprNodeGenericFuncDesc op = DDLSemanticAnalyzer.makeBinaryPredicate("=", column, new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, val));
expr = (expr == null) ? op : DDLSemanticAnalyzer.makeBinaryPredicate("and", expr, op);
}
if (expr != null) {
partitionDesc.add(expr);
}
}
if (partitionDesc.size() > 0) {
partSpecs.put(partPrefixLength, partitionDesc);
}
return partSpecs;
}
Aggregations