use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class TestAccumuloRowSerializer method testMapSerialization.
@Test
public void testMapSerialization() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "col");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo));
List<String> typeNames = new ArrayList<String>(types.size());
for (TypeInfo type : types) {
typeNames.add(type.getTypeName());
}
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:*");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.COLLECTION_DELIM, ",");
tableProperties.setProperty(serdeConstants.MAPKEY_DELIM, ":");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
// Create the LazyStruct from the LazyStruct...Inspector
LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef byteRef = new ByteArrayRef();
byteRef.setData("row1 cq1:10,cq2:20,cq3:value".getBytes());
obj.init(byteRef, 0, byteRef.getData().length);
Mutation m = (Mutation) serializer.serialize(obj, structOI);
Assert.assertArrayEquals("row1".getBytes(), m.getRow());
List<ColumnUpdate> updates = m.getUpdates();
Assert.assertEquals(3, updates.size());
ColumnUpdate update = updates.get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
Assert.assertEquals("10", new String(update.getValue()));
update = updates.get(1);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
Assert.assertEquals("20", new String(update.getValue()));
update = updates.get(2);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
Assert.assertEquals("value", new String(update.getValue()));
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class TestLazyAccumuloMap method testStringMapWithProjection.
@Test
public void testStringMapWithProjection() throws SerDeException {
AccumuloHiveRow row = new AccumuloHiveRow("row");
row.add("cf1", "foo", "bar".getBytes());
row.add("cf1", "bar", "foo".getBytes());
row.add("cf2", "foo1", "bar1".getBytes());
row.add("cf3", "bar1", "foo1".getBytes());
HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, ColumnEncoding.STRING, ColumnEncoding.STRING, "column", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo).toString());
// Map of Integer to String
Text nullSequence = new Text("\\N");
ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi);
map.init(row, mapping);
Assert.assertEquals(2, map.getMapSize());
Object o = map.getMapValueElement(new Text("foo"));
Assert.assertNotNull(o);
Assert.assertEquals(new Text("bar"), ((LazyString) o).getWritableObject());
o = map.getMapValueElement(new Text("bar"));
Assert.assertNotNull(o);
Assert.assertEquals(new Text("foo"), ((LazyString) o).getWritableObject());
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class SemanticAnalyzer method handleInsertStatementSpec.
/**
* This modifies the Select projections when the Select is part of an insert statement and
* the insert statement specifies a column list for the target table, e.g.
* create table source (a int, b int);
* create table target (x int, y int, z int);
* insert into target(z,x) select * from source
*
* Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks
* as if the original query was written as
* insert into target select b, null, a from source
*
* if target schema is not specified, this is no-op
*
* @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
* @throws SemanticException
*/
RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, RowResolver outputRR, QB qb, ASTNode selExprList) throws SemanticException {
// (z,x)
// specified in the query
List<String> targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);
if (targetTableSchema == null) {
// no insert schema was specified
return outputRR;
}
if (targetTableSchema.size() != col_list.size()) {
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
throw new SemanticException(generateErrorMessage(selExprList, "Expected " + targetTableSchema.size() + " columns for " + dest + (target != null ? "/" + target.getCompleteName() : (partition != null ? "/" + partition.getCompleteName() : "")) + "; select produces " + col_list.size() + " columns"));
}
// e.g. map z->expr for a
Map<String, ExprNodeDesc> targetCol2Projection = new HashMap<String, ExprNodeDesc>();
// e.g. map z->ColumnInfo for a
Map<String, ColumnInfo> targetCol2ColumnInfo = new HashMap<String, ColumnInfo>();
int colListPos = 0;
for (String targetCol : targetTableSchema) {
targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos));
targetCol2Projection.put(targetCol, col_list.get(colListPos++));
}
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
if (target == null && partition == null) {
throw new SemanticException(generateErrorMessage(selExprList, "No table/partition found in QB metadata for dest='" + dest + "'"));
}
List<ExprNodeDesc> newColList = new ArrayList<ExprNodeDesc>();
colListPos = 0;
List<FieldSchema> targetTableCols = target != null ? target.getCols() : partition.getCols();
List<String> targetTableColNames = new ArrayList<String>();
List<TypeInfo> targetTableColTypes = new ArrayList<TypeInfo>();
for (FieldSchema fs : targetTableCols) {
targetTableColNames.add(fs.getName());
targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
}
Map<String, String> partSpec = qb.getMetaData().getPartSpecForAlias(dest);
if (partSpec != null) {
// relies on consistent order via LinkedHashMap
for (Map.Entry<String, String> partKeyVal : partSpec.entrySet()) {
if (partKeyVal.getValue() == null) {
// these must be after non-partition cols
targetTableColNames.add(partKeyVal.getKey());
targetTableColTypes.add(TypeInfoFactory.stringTypeInfo);
}
}
}
// now make the select produce <regular columns>,<dynamic partition columns> with
// where missing columns are NULL-filled
Table tbl = target == null ? partition.getTable() : target;
RowResolver newOutputRR = getColForInsertStmtSpec(targetCol2Projection, tbl, targetCol2ColumnInfo, colListPos, targetTableColTypes, newColList, targetTableColNames);
col_list.clear();
col_list.addAll(newColList);
return newOutputRR;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class AlterTableAlterPartitionOperation method checkPartitionValues.
/**
* Check if the existing partition values can be type casted to the new column type
* with a non null value before trying to alter the partition column type.
*/
private void checkPartitionValues(Table tbl, int colIndex) throws HiveException {
TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(desc.getPartKeyType());
ObjectInspector outputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
Set<Partition> partitions = context.getDb().getAllPartitionsOf(tbl);
for (Partition part : partitions) {
if (part.getName().equals(context.getConf().getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME))) {
continue;
}
try {
String value = part.getValues().get(colIndex);
Object convertedValue = converter.convert(value);
if (convertedValue == null) {
throw new HiveException(" Converting from " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + value + " resulted in NULL object");
}
} catch (Exception e) {
throw new HiveException("Exception while converting " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + part.getValues().get(colIndex));
}
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.
the class VectorizationContext method getStructInExpression.
private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
VectorExpression expr;
StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfos.size();
ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
for (int f = 0; f < fieldCount; f++) {
TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
// Only primitive fields supports for now.
if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
return null;
}
// We are going to serialize using the 4 basic types.
ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
fieldVectorColumnTypes[f] = fieldVectorColumnType;
// We currently evaluate the IN (..) constants in special ways.
PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
fieldInConstantTypes[f] = inConstantType;
}
Output buffer = new Output();
BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
final int inChildrenCount = inChildren.size();
byte[][] serializedInChildren = new byte[inChildrenCount][];
try {
for (int i = 0; i < inChildrenCount; i++) {
final ExprNodeDesc node = inChildren.get(i);
final Object[] constants;
if (node instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
ConstantObjectInspector output = constNode.getWritableObjectInspector();
constants = ((List<?>) output.getWritableConstantValue()).toArray();
} else {
ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
constants = (Object[]) evaluator.evaluate(null);
}
binarySortableSerializeWrite.set(buffer);
for (int f = 0; f < fieldCount; f++) {
Object constant = constants[f];
if (constant == null) {
binarySortableSerializeWrite.writeNull();
} else {
InConstantType inConstantType = fieldInConstantTypes[f];
switch(inConstantType) {
case STRING_FAMILY:
{
byte[] bytes;
if (constant instanceof Text) {
Text text = (Text) constant;
bytes = text.getBytes();
binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
} else {
throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
}
}
break;
case INT_FAMILY:
{
long value;
if (constant instanceof IntWritable) {
value = ((IntWritable) constant).get();
} else if (constant instanceof LongWritable) {
value = ((LongWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeLong(value);
}
break;
case FLOAT_FAMILY:
{
double value;
if (constant instanceof DoubleWritable) {
value = ((DoubleWritable) constant).get();
} else {
throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
}
binarySortableSerializeWrite.writeDouble(value);
}
break;
// UNDONE...
case DATE:
case TIMESTAMP:
case DECIMAL:
default:
throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
}
}
}
serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
}
} catch (Exception e) {
throw new HiveException(e);
}
// Create a single child representing the scratch column where we will
// generate the serialized keys of the batch.
int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
((IStringInExpr) expr).setInListValues(serializedInChildren);
((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
return expr;
}
Aggregations