use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.
the class TestAccumuloSerDe method testStructOfMapSerialization.
@Test
public void testStructOfMapSerialization() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "col");
List<String> structColNames = Arrays.asList("map1", "map2");
TypeInfo mapTypeInfo = TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
// struct<map1:map<string,string>,map2:map<string,string>>,string
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getStructTypeInfo(structColNames, Arrays.asList(mapTypeInfo, mapTypeInfo)), TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq");
// Use the default separators [0, 1, 2, 3, ..., 7]
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
byte[] seps = serDeParams.getSeparators();
// struct<map<k:v,k:v>_map<k:v,k:v>>>
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, seps[3], seps[4], serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector rowStructOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(structColNames, Arrays.<ObjectInspector>asList(mapOI, mapOI), (byte) seps[2], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(rowStructOI, stringOI), seps[1], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
Map<String, String> map1 = new HashMap<String, String>(), map2 = new HashMap<String, String>();
map1.put("key10", "value10");
map1.put("key11", "value11");
map2.put("key20", "value20");
map2.put("key21", "value21");
ByteArrayRef byteRef = new ByteArrayRef();
// Default separators are 1-indexed (instead of 0-indexed), thus the separator at offset 1 is
// (byte) 2
// The separator for the hive row is \x02, for the row Id struct, \x03, and the maps \x04 and
// \x05
String accumuloRow = "key10\5value10\4key11\5value11\3key20\5value20\4key21\5value21";
LazyStruct entireStruct = (LazyStruct) LazyFactory.createLazyObject(structOI);
byteRef.setData((accumuloRow + "\2foo").getBytes());
entireStruct.init(byteRef, 0, byteRef.getData().length);
Mutation m = serializer.serialize(entireStruct, structOI);
Assert.assertArrayEquals(accumuloRow.getBytes(), m.getRow());
Assert.assertEquals(1, m.getUpdates().size());
ColumnUpdate update = m.getUpdates().get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getValue()));
AccumuloHiveRow haRow = new AccumuloHiveRow(new String(m.getRow()));
haRow.add("cf", "cq", "foo".getBytes());
LazyAccumuloRow lazyAccumuloRow = new LazyAccumuloRow(structOI);
lazyAccumuloRow.init(haRow, accumuloSerDeParams.getColumnMappings(), accumuloSerDeParams.getRowIdFactory());
List<Object> objects = lazyAccumuloRow.getFieldsAsList();
Assert.assertEquals(2, objects.size());
Assert.assertEquals("foo", objects.get(1).toString());
LazyStruct rowStruct = (LazyStruct) objects.get(0);
List<Object> rowObjects = rowStruct.getFieldsAsList();
Assert.assertEquals(2, rowObjects.size());
LazyMap rowMap = (LazyMap) rowObjects.get(0);
Map<?, ?> actualMap = rowMap.getMap();
System.out.println("Actual map 1: " + actualMap);
Map<String, String> actualStringMap = new HashMap<String, String>();
for (Entry<?, ?> entry : actualMap.entrySet()) {
actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
}
Assert.assertEquals(map1, actualStringMap);
rowMap = (LazyMap) rowObjects.get(1);
actualMap = rowMap.getMap();
System.out.println("Actual map 2: " + actualMap);
actualStringMap = new HashMap<String, String>();
for (Entry<?, ?> entry : actualMap.entrySet()) {
actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
}
Assert.assertEquals(map2, actualStringMap);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project carbondata by apache.
the class CarbonHiveRecordReader method initialize.
public void initialize(InputSplit inputSplit, Configuration conf) throws IOException {
// The input split can contain single HDFS block or multiple blocks, so firstly get all the
// blocks and then set them in the query model.
List<CarbonHiveInputSplit> splitList;
if (inputSplit instanceof CarbonHiveInputSplit) {
splitList = new ArrayList<>(1);
splitList.add((CarbonHiveInputSplit) inputSplit);
} else {
throw new RuntimeException("unsupported input split type: " + inputSplit);
}
List<TableBlockInfo> tableBlockInfoList = CarbonHiveInputSplit.createBlocks(splitList);
queryModel.setTableBlockInfos(tableBlockInfoList);
readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
try {
carbonIterator = new ChunkRowIterator(queryExecutor.execute(queryModel));
} catch (QueryExecutionException e) {
throw new IOException(e.getMessage(), e.getCause());
}
if (valueObj == null) {
valueObj = new ArrayWritable(Writable.class, new Writable[queryModel.getProjectionColumns().length]);
}
final TypeInfo rowTypeInfo;
final List<String> columnNames;
List<TypeInfo> columnTypes;
// Get column names and sort order
final String colIds = conf.get("hive.io.file.readcolumn.ids");
final String columnNameProperty = conf.get("hive.io.file.readcolumn.names");
final String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(","));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
String[] arraySelectedColId = colIds.split(",");
List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
for (String anArrayColId : arraySelectedColId) {
reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
}
// Create row related objects
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, reqColTypes);
this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project carbondata by apache.
the class CarbonHiveSerDe method initialize.
@Override
public void initialize(@Nullable Configuration configuration, Properties tbl) throws SerDeException {
final TypeInfo rowTypeInfo;
final List<String> columnNames;
final List<String> reqColNames;
final List<TypeInfo> columnTypes;
// Get column names and sort order
assert configuration != null;
final String colIds = configuration.get("hive.io.file.readcolumn.ids");
final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(","));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
if (colIds != null) {
reqColNames = new ArrayList<String>();
String[] arraySelectedColId = colIds.split(",");
List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
for (String anArrayColId : arraySelectedColId) {
reqColNames.add(columnNames.get(Integer.parseInt(anArrayColId)));
reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
}
// Create row related objects
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(reqColNames, reqColTypes);
this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
} else {
// Create row related objects
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
// Stats part
serializedSize = 0;
deserializedSize = 0;
status = LAST_OPERATION.UNKNOWN;
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.
the class TestKeyWrapperFactory method setup.
@Before
public void setup() throws Exception {
SessionState ss = new SessionState(new HiveConf());
SessionState.setCurrentSessionState(ss);
ArrayList<Text> col1 = new ArrayList<Text>();
col1.add(new Text("0"));
col1.add(new Text("1"));
col1.add(new Text("2"));
col1.add(new Text("3"));
TypeInfo col1Type = TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo);
ArrayList<Text> cola = new ArrayList<Text>();
cola.add(new Text("a"));
cola.add(new Text("b"));
cola.add(new Text("c"));
TypeInfo colaType = TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo);
try {
ArrayList<Object> data = new ArrayList<Object>();
data.add(col1);
data.add(cola);
ArrayList<String> names = new ArrayList<String>();
names.add("col1");
names.add("cola");
ArrayList<TypeInfo> typeInfos = new ArrayList<TypeInfo>();
typeInfos.add(col1Type);
typeInfos.add(colaType);
TypeInfo dataType = TypeInfoFactory.getStructTypeInfo(names, typeInfos);
InspectableObject r = new InspectableObject();
ObjectInspector[] oi = new ObjectInspector[1];
r.o = data;
oi[0] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(dataType);
try {
// get a evaluator for a simple field expression
ExprNodeDesc exprDesc = new ExprNodeColumnDesc(colaType, "cola", "", false);
ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(exprDesc);
ExprNodeEvaluator[] evals = new ExprNodeEvaluator[1];
evals[0] = eval;
ObjectInspector resultOI = eval.initialize(oi[0]);
ObjectInspector[] resultOIs = new ObjectInspector[1];
resultOIs[0] = resultOI;
factory = new KeyWrapperFactory(evals, oi, resultOIs);
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
} catch (Throwable e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.
the class VectorMapOperator method internalSetChildren.
/*
* Create information for vector map operator.
* The member oneRootOperator has been set.
*/
private void internalSetChildren(Configuration hconf) throws Exception {
// The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
// so set it here to none.
currentReadType = VectorMapOperatorReadType.NONE;
batchContext = conf.getVectorizedRowBatchCtx();
/*
* Use a different batch for vectorized Input File Format readers so they can do their work
* overlapped with work of the row collection that vector/row deserialization does. This allows
* the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
*/
vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);
/*
* This batch is used by vector/row deserializer readers.
*/
deserializerBatch = batchContext.createVectorizedRowBatch();
batchCounter = 0;
dataColumnCount = batchContext.getDataColumnCount();
partitionColumnCount = batchContext.getPartitionColumnCount();
partitionValues = new Object[partitionColumnCount];
virtualColumnCount = batchContext.getVirtualColumnCount();
rowIdentifierColumnNum = batchContext.findVirtualColumnNum(VirtualColumn.ROWID);
hasRowIdentifier = (rowIdentifierColumnNum != -1);
dataColumnNums = batchContext.getDataColumnNums();
Preconditions.checkState(dataColumnNums != null);
// Form a truncated boolean include array for our vector/row deserializers.
determineDataColumnsToIncludeTruncated();
/*
* Create table related objects
*/
final String[] rowColumnNames = batchContext.getRowColumnNames();
final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
/*
* NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
* the included columns (+ partition columns).
*
* For now, we need to model the object inspector rows because there are still several
* vectorized operators that use them.
*
* We need to continue to model the Object[] as having null objects for not included columns
* until the following has been fixed:
* o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
* o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
* use the row super class to process rows.
*/
/*
* The Vectorizer class enforces that there is only one TableScanOperator, so
* we don't need the more complicated multiple root operator mapping that MapOperator has.
*/
fileToPartitionContextMap = new HashMap<String, VectorPartitionContext>();
// Temporary map so we only create one partition context entry.
HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
Path path = entry.getKey();
PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
VectorPartitionContext vectorPartitionContext;
if (!partitionContextMap.containsKey(partDesc)) {
vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
partitionContextMap.put(partDesc, vectorPartitionContext);
} else {
vectorPartitionContext = partitionContextMap.get(partDesc);
}
fileToPartitionContextMap.put(path.toString(), vectorPartitionContext);
}
// Create list of one.
List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
children.add(oneRootOperator);
setChildOperators(children);
}
Aggregations