use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project mongo-hadoop by mongodb.
the class BSONSerDe method initialize.
/**
* Finds out the information of the table, including the column names and types.
*/
@SuppressWarnings("unchecked")
@Override
public void initialize(final Configuration conf, final Properties tblProps) throws SerDeException {
// regex used to split column names between commas
String splitCols = "\\s*,\\s*";
// Get the table column names
String colNamesStr = tblProps.getProperty(serdeConstants.LIST_COLUMNS);
columnNames = Arrays.asList(colNamesStr.split(splitCols));
// Get mappings specified by the user
if (tblProps.containsKey(MONGO_COLS)) {
String mongoFieldsStr = tblProps.getProperty(MONGO_COLS);
Map<String, String> rules = ((BasicBSONObject) JSON.parse(mongoFieldsStr)).toMap();
// register the hive field mappings to mongo field mappings
hiveToMongo = new HashMap<String, String>();
registerMappings(rules);
}
// Get the table column types
String colTypesStr = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES);
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
if (columnNames.size() != columnTypes.size()) {
throw new SerDeException("Column Names and Types don't match in size");
}
// Get the structure and object inspector
docTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
docOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(docTypeInfo);
// Create the BSONWritable instance for future use.
bsonWritable = new BSONWritable();
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project phoenix by apache.
the class PhoenixSerDe method createLazyPhoenixInspector.
private ObjectInspector createLazyPhoenixInspector(Configuration conf, Properties tbl) throws SerDeException {
List<String> columnNameList = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS).split(PhoenixStorageHandlerConstants.COMMA));
List<TypeInfo> columnTypeList = TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES));
List<ObjectInspector> columnObjectInspectors = Lists.newArrayListWithExpectedSize(columnTypeList.size());
for (TypeInfo typeInfo : columnTypeList) {
columnObjectInspectors.add(PhoenixObjectInspectorFactory.createObjectInspector(typeInfo, serdeParams));
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columnNameList, columnObjectInspectors, null, serdeParams.getSeparators()[0], serdeParams, ObjectInspectorOptions.JAVA);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project carbondata by apache.
the class CarbonHiveRecordReader method initialize.
public void initialize(InputSplit inputSplit, Configuration conf) throws IOException {
// The input split can contain single HDFS block or multiple blocks, so firstly get all the
// blocks and then set them in the query model.
List<CarbonHiveInputSplit> splitList;
if (inputSplit instanceof CarbonHiveInputSplit) {
splitList = new ArrayList<>(1);
splitList.add((CarbonHiveInputSplit) inputSplit);
} else {
throw new RuntimeException("unsupported input split type: " + inputSplit);
}
List<TableBlockInfo> tableBlockInfoList = CarbonHiveInputSplit.createBlocks(splitList);
queryModel.setTableBlockInfos(tableBlockInfoList);
readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
try {
carbonIterator = new ChunkRowIterator(queryExecutor.execute(queryModel));
} catch (QueryExecutionException e) {
throw new IOException(e.getMessage(), e.getCause());
}
if (valueObj == null) {
valueObj = new ArrayWritable(Writable.class, new Writable[queryModel.getProjectionColumns().length]);
}
final TypeInfo rowTypeInfo;
final List<String> columnNames;
List<TypeInfo> columnTypes;
// Get column names and sort order
final String colIds = conf.get("hive.io.file.readcolumn.ids");
final String columnNameProperty = conf.get("hive.io.file.readcolumn.names");
final String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(","));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
String[] arraySelectedColId = colIds.split(",");
List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
for (String anArrayColId : arraySelectedColId) {
reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
}
// Create row related objects
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, reqColTypes);
this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project carbondata by apache.
the class CarbonHiveSerDe method initialize.
@Override
public void initialize(@Nullable Configuration configuration, Properties tbl) throws SerDeException {
final TypeInfo rowTypeInfo;
final List<String> columnNames;
final List<String> reqColNames;
final List<TypeInfo> columnTypes;
// Get column names and sort order
assert configuration != null;
final String colIds = configuration.get("hive.io.file.readcolumn.ids");
final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(","));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
if (colIds != null) {
reqColNames = new ArrayList<String>();
String[] arraySelectedColId = colIds.split(",");
List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
for (String anArrayColId : arraySelectedColId) {
reqColNames.add(columnNames.get(Integer.parseInt(anArrayColId)));
reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
}
// Create row related objects
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(reqColNames, reqColTypes);
this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
} else {
// Create row related objects
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
// Stats part
serializedSize = 0;
deserializedSize = 0;
status = LAST_OPERATION.UNKNOWN;
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TestLazyArrayMapStruct method testLazyArray.
/**
* Test the LazyArray class.
*/
public void testLazyArray() throws Throwable {
try {
// Array of Byte
Text nullSequence = new Text("\\N");
ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("array<tinyint>").get(0), new byte[] { (byte) 1 }, 0, nullSequence, false, (byte) 0);
LazyArray b = (LazyArray) LazyFactory.createLazyObject(oi);
byte[] data = new byte[] { '-', '1', 1, '\\', 'N', 1, '8' };
TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
assertNull(b.getListElementObject(-1));
assertEquals(new ByteWritable((byte) -1), ((LazyByte) b.getListElementObject(0)).getWritableObject());
assertEquals(new ByteWritable((byte) -1), ((LazyByte) b.getList().get(0)).getWritableObject());
assertNull(b.getListElementObject(1));
assertNull(b.getList().get(1));
assertEquals(new ByteWritable((byte) 8), ((LazyByte) b.getListElementObject(2)).getWritableObject());
assertEquals(new ByteWritable((byte) 8), ((LazyByte) b.getList().get(2)).getWritableObject());
assertNull(b.getListElementObject(3));
assertEquals(3, b.getList().size());
// Array of String
oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("array<string>").get(0), new byte[] { (byte) '\t' }, 0, nullSequence, false, (byte) 0);
b = (LazyArray) LazyFactory.createLazyObject(oi);
data = new byte[] { 'a', 'b', '\t', 'c', '\t', '\\', 'N', '\t', '\t', 'd' };
// Note: the first and last element of the byte[] are NOT used
TestLazyPrimitive.initLazyObject(b, data, 1, data.length - 2);
assertNull(b.getListElementObject(-1));
assertEquals(new Text("b"), ((LazyString) b.getListElementObject(0)).getWritableObject());
assertEquals(new Text("b"), ((LazyString) b.getList().get(0)).getWritableObject());
assertEquals(new Text("c"), ((LazyString) b.getListElementObject(1)).getWritableObject());
assertEquals(new Text("c"), ((LazyString) b.getList().get(1)).getWritableObject());
assertNull((b.getListElementObject(2)));
assertNull((b.getList().get(2)));
assertEquals(new Text(""), ((LazyString) b.getListElementObject(3)).getWritableObject());
assertEquals(new Text(""), ((LazyString) b.getList().get(3)).getWritableObject());
assertEquals(new Text(""), ((LazyString) b.getListElementObject(4)).getWritableObject());
assertEquals(new Text(""), ((LazyString) b.getList().get(4)).getWritableObject());
assertNull((b.getListElementObject(5)));
assertEquals(5, b.getList().size());
// -- HIVE-4149
b = (LazyArray) LazyFactory.createLazyObject(oi);
data = new byte[] { 'a', '\t', '\\', 'N' };
TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
assertEquals(new Text("a"), ((LazyString) b.getListElementObject(0)).getWritableObject());
assertNull(b.getListElementObject(1));
data = new byte[] { '\\', 'N', '\t', 'a' };
TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
assertNull(b.getListElementObject(0));
// twice (returns not cleaned cache)
assertNull(b.getListElementObject(0));
assertEquals(new Text("a"), ((LazyString) b.getListElementObject(1)).getWritableObject());
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
Aggregations