use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class LazySerDeParameters method extractColumnInfo.
/**
* Extracts and set column names and column types from the table properties
* @throws SerDeException
*/
public void extractColumnInfo(Configuration conf) throws SerDeException {
// Read the configuration parameters
String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
// NOTE: if "columns.types" is missing, all columns will be of String type
String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
// Parse the configuration parameters
String columnNameDelimiter = tableProperties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tableProperties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
if (columnNameProperty != null && columnNameProperty.length() > 0) {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
} else {
columnNames = new ArrayList<String>();
}
if (columnTypeProperty == null) {
// Default type: all string
StringBuilder sb = new StringBuilder();
for (int i = 0; i < columnNames.size(); i++) {
if (i > 0) {
sb.append(":");
}
sb.append(serdeConstants.STRING_TYPE_NAME);
}
columnTypeProperty = sb.toString();
}
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
// Insert time-zone for timestamp type
if (conf != null) {
final TimestampLocalTZTypeInfo tsTZTypeInfo = new TimestampLocalTZTypeInfo(conf.get(ConfVars.HIVE_LOCAL_TIME_ZONE.varname));
for (int i = 0; i < columnTypes.size(); i++) {
if (columnTypes.get(i) instanceof TimestampLocalTZTypeInfo) {
columnTypes.set(i, tsTZTypeInfo);
}
}
}
if (columnNames.size() != columnTypes.size()) {
throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while columns.types has " + columnTypes.size() + " elements!");
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TestLazyHBaseCellMap method testInitColumnPrefix.
@Test
public void testInitColumnPrefix() throws Exception {
Text nullSequence = new Text("\\N");
ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
// Initialize a result
Cell[] cells = new KeyValue[2];
final String col1 = "1";
final String col2 = "2";
cells[0] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col1), Bytes.toBytes("cfacol1"));
cells[1] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col2), Bytes.toBytes("cfacol2"));
Result r = Result.create(cells);
List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
mapBinaryStorage.add(false);
mapBinaryStorage.add(false);
b.init(r, COLUMN_FAMILY, mapBinaryStorage, Bytes.toBytes(QUAL_PREFIX), true);
assertNotNull(b.getMapValueElement(new Text(col1)));
assertNotNull(b.getMapValueElement(new Text(col2)));
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TestLazyHBaseObject method testLazyHBaseCellMap2.
/**
* Test the LazyMap class with String-to-String.
* @throws SerDeException
*/
public void testLazyHBaseCellMap2() throws SerDeException {
// Map of String to String
Text nullSequence = new Text("\\N");
ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) '#', (byte) '\t' }, 0, nullSequence, false, (byte) 0);
LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
// Initialize a result
List<Cell> kvs = new ArrayList<Cell>();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col1"), Bytes.toBytes("cfacol1")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col2"), Bytes.toBytes("cfacol2")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("2"), Bytes.toBytes("d\tf")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("-1"), Bytes.toBytes("")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("0"), Bytes.toBytes("0")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("8"), Bytes.toBytes("abc")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfc"), Bytes.toBytes("col3"), Bytes.toBytes("cfccol3")));
Result r = Result.create(kvs);
List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
mapBinaryStorage.add(false);
mapBinaryStorage.add(false);
b.init(r, "cfb".getBytes(), mapBinaryStorage);
assertEquals(new Text("d\tf"), ((LazyString) b.getMapValueElement(new Text("2"))).getWritableObject());
assertNull(b.getMapValueElement(new Text("-1")));
assertEquals(new Text("0"), ((LazyString) b.getMapValueElement(new Text("0"))).getWritableObject());
assertEquals(new Text("abc"), ((LazyString) b.getMapValueElement(new Text("8"))).getWritableObject());
assertNull(b.getMapValueElement(new Text("-")));
assertEquals("{'0':'0','2':'d\\tf','8':'abc'}".replace('\'', '\"'), SerDeUtils.getJSONString(b, oi));
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TestLazyHBaseObject method testLazyHBaseRow1.
/**
* Test the LazyHBaseRow class with one-for-one mappings between
* Hive fields and HBase columns.
* @throws SerDeException
*/
public void testLazyHBaseRow1() throws SerDeException {
List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,array<string>,map<string,string>,string");
List<String> fieldNames = Arrays.asList("key", "a", "b", "c", "d");
Text nullSequence = new Text("\\N");
String hbaseColsMapping = ":key,cfa:a,cfa:b,cfb:c,cfb:d";
ColumnMappings columnMappings = null;
try {
columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColsMapping);
} catch (SerDeException e) {
fail(e.toString());
}
for (ColumnMapping colMap : columnMappings) {
if (!colMap.hbaseRowKey && colMap.qualifierName == null) {
colMap.binaryStorage.add(false);
colMap.binaryStorage.add(false);
} else {
colMap.binaryStorage.add(false);
}
}
ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
List<Cell> kvs = new ArrayList<Cell>();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a:b:c")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=e:f=g")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("hi")));
Result r = Result.create(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':123,'b':['a','b','c']," + "'c':{'d':'e','f':'g'},'d':'hi'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=e:f=g")));
r = Result.create(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':123,'b':null," + "'c':{'d':'e','f':'g'},'d':null}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=\\N:f=g:h")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no")));
r = Result.create(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':null,'b':['a']," + "'c':{'d':null,'f':'g','h':null},'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes(":a::")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no")));
r = Result.create(kvs);
o.init(r);
assertEquals(("{'key':'test-row','a':null,'b':['','a','','']," + "'c':null,'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
// This is intentionally duplicated because of HIVE-3179
assertEquals(("{'key':'test-row','a':null,'b':['','a','','']," + "'c':null,'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
kvs.clear();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("")));
r = Result.create(kvs);
o.init(r);
assertEquals("{'key':'test-row','a':123,'b':[],'c':{},'d':''}".replace("'", "\""), SerDeUtils.getJSONString(o, oi));
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TestLazyHBaseObject method testLazyHBaseRow3.
/**
* Test the LazyHBaseRow class with a one-to-one/onto mapping between Hive columns and
* HBase column family/column qualifier pairs. The column types are primitive and fields
* are stored in binary format in HBase.
* @throws SerDeException
*/
public void testLazyHBaseRow3() throws SerDeException {
List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,tinyint,smallint,bigint,float,double,string,boolean");
List<String> fieldNames = Arrays.asList(new String[] { "key", "c_int", "c_byte", "c_short", "c_long", "c_float", "c_double", "c_string", "c_bool" });
Text nullSequence = new Text("\\N");
String hbaseColumnsMapping = ":key#str,cf-int:cq-int#bin,cf-byte:cq-byte#bin," + "cf-short:cq-short#bin,cf-long:cq-long#bin,cf-float:cq-float#bin,cf-double:cq-double#bin," + "cf-string:cq-string#str,cf-bool:cq-bool#bin";
ColumnMappings columnMappings = null;
try {
columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping);
} catch (SerDeException e) {
fail(e.toString());
}
ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
for (int i = 0; i < columnsMapping.length; i++) {
ColumnMapping colMap = columnsMapping[i];
if (i == 0 || i == 7) {
colMap.binaryStorage.add(false);
} else {
colMap.binaryStorage.add(true);
}
}
ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
byte[] rowKey = "row-key".getBytes();
List<Cell> kvs = new ArrayList<Cell>();
byte[] value;
for (int i = 1; i < columnsMapping.length; i++) {
switch(i) {
case 1:
value = Bytes.toBytes(1);
break;
case 2:
value = new byte[] { (byte) 1 };
break;
case 3:
value = Bytes.toBytes((short) 1);
break;
case 4:
value = Bytes.toBytes((long) 1);
break;
case 5:
value = Bytes.toBytes((float) 1.0F);
break;
case 6:
value = Bytes.toBytes((double) 1.0);
break;
case 7:
value = "Hadoop, Hive, with HBase storage handler.".getBytes();
break;
case 8:
value = Bytes.toBytes(true);
break;
default:
throw new RuntimeException("Not expected: " + i);
}
ColumnMapping colMap = columnsMapping[i];
kvs.add(new KeyValue(rowKey, colMap.familyNameBytes, colMap.qualifierNameBytes, value));
}
Collections.sort(kvs, KeyValue.COMPARATOR);
Result result = Result.create(kvs);
o.init(result);
List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = ((StructObjectInspector) oi).getStructFieldData(o, fieldRefs.get(i));
assert (fieldData != null);
assert (fieldData instanceof LazyPrimitive<?, ?>);
Writable writable = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
switch(i) {
case 0:
Text text = new Text("row-key");
assertEquals(text, writable);
break;
case 1:
IntWritable iw = new IntWritable(1);
assertEquals(iw, writable);
break;
case 2:
ByteWritable bw = new ByteWritable((byte) 1);
assertEquals(bw, writable);
break;
case 3:
ShortWritable sw = new ShortWritable((short) 1);
assertEquals(sw, writable);
break;
case 4:
LongWritable lw = new LongWritable(1);
assertEquals(lw, writable);
break;
case 5:
FloatWritable fw = new FloatWritable(1.0F);
assertEquals(fw, writable);
break;
case 6:
DoubleWritable dw = new DoubleWritable(1.0);
assertEquals(dw, writable);
break;
case 7:
Text t = new Text("Hadoop, Hive, with HBase storage handler.");
assertEquals(t, writable);
break;
case 8:
BooleanWritable boolWritable = new BooleanWritable(true);
assertEquals(boolWritable, writable);
break;
default:
fail("Error: Unanticipated value in deserializing fields for HBaseSerDe.");
break;
}
}
}
Aggregations