use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class RegexSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// We can get the table definition from tbl.
// Read the configuration parameters
inputRegex = tbl.getProperty(INPUT_REGEX);
outputFormatString = tbl.getProperty(OUTPUT_FORMAT_STRING);
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE));
// Parse the configuration parameters
if (inputRegex != null) {
inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
} else {
inputPattern = null;
}
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
assert columnNames.size() == columnTypes.size();
numColumns = columnNames.size();
// All columns have to be of type STRING.
for (int c = 0; c < numColumns; c++) {
if (!columnTypes.get(c).equals(TypeInfoFactory.stringTypeInfo)) {
throw new SerDeException(getClass().getName() + " only accepts string columns, but column[" + c + "] named " + columnNames.get(c) + " has type " + columnTypes.get(c));
}
}
// Constructing the row ObjectInspector:
// The row consists of some string columns, each column will be a java
// String object.
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
for (int c = 0; c < numColumns; c++) {
columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
// StandardStruct uses ArrayList to store the row.
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
// Constructing the row object, etc, which will be reused for all rows.
row = new ArrayList<String>(numColumns);
for (int c = 0; c < numColumns; c++) {
row.add(null);
}
outputFields = new Object[numColumns];
outputRowText = new Text();
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TypedBytesSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// We can get the table definition from tbl.
serializeBytesWritable = new BytesWritable();
barrStr = new NonSyncDataOutputBuffer();
tbOut = new TypedBytesWritableOutput(barrStr);
inBarrStr = new NonSyncDataInputBuffer();
tbIn = new TypedBytesWritableInput(inBarrStr);
// Read the configuration parameters
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
columnTypes = null;
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
assert columnNames.size() == columnTypes.size();
numColumns = columnNames.size();
// All columns have to be primitive.
for (int c = 0; c < numColumns; c++) {
if (columnTypes.get(c).getCategory() != Category.PRIMITIVE) {
throw new SerDeException(getClass().getName() + " only accepts primitive columns, but column[" + c + "] named " + columnNames.get(c) + " has category " + columnTypes.get(c).getCategory());
}
}
// Constructing the row ObjectInspector:
// The row consists of some string columns, each column will be a java
// String object.
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
for (int c = 0; c < numColumns; c++) {
columnOIs.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(columnTypes.get(c)));
}
// StandardStruct uses ArrayList to store the row.
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
// Constructing the row object, etc, which will be reused for all rows.
row = new ArrayList<Object>(numColumns);
for (int c = 0; c < numColumns; c++) {
row.add(null);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class LazyHBaseCellMapTest method testInitColumnPrefix.
public void testInitColumnPrefix() throws Exception {
Text nullSequence = new Text("\\N");
ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
// Initialize a result
Cell[] cells = new KeyValue[2];
final String col1 = "1";
final String col2 = "2";
cells[0] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col1), Bytes.toBytes("cfacol1"));
cells[1] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col2), Bytes.toBytes("cfacol2"));
Result r = Result.create(cells);
List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
mapBinaryStorage.add(false);
mapBinaryStorage.add(false);
b.init(r, COLUMN_FAMILY, mapBinaryStorage, Bytes.toBytes(QUAL_PREFIX), true);
assertNotNull(b.getMapValueElement(new Text(col1)));
assertNotNull(b.getMapValueElement(new Text(col2)));
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class ColumnarStorageBench method createRandomRow.
private Object createRandomRow(final String columnTypes) throws SerDeException {
Writable recordWritable = createRecord(TypeInfoUtils.getTypeInfosFromTypeString(columnTypes));
Writable simpleWritable = lazySimpleSerDe.serialize(recordWritable, getArrayWritableObjectInspector(columnTypes));
return lazySimpleSerDe.deserialize(simpleWritable);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TestLazyAccumuloMap method testBinaryIntMap.
@Test
public void testBinaryIntMap() throws SerDeException, IOException {
AccumuloHiveRow row = new AccumuloHiveRow("row");
row.add(new Text("cf1"), new Text(toBytes(1)), toBytes(2));
row.add(new Text("cf1"), new Text(toBytes(2)), toBytes(4));
row.add(new Text("cf1"), new Text(toBytes(3)), toBytes(6));
HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, ColumnEncoding.BINARY, ColumnEncoding.BINARY, "column", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo).toString());
// Map of Integer to String
Text nullSequence = new Text("\\N");
ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<int,int>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi);
map.init(row, mapping);
Assert.assertEquals(3, map.getMapSize());
Object o = map.getMapValueElement(new IntWritable(1));
Assert.assertNotNull(o);
Assert.assertEquals(new IntWritable(2), ((LazyInteger) o).getWritableObject());
o = map.getMapValueElement(new IntWritable(2));
Assert.assertNotNull(o);
Assert.assertEquals(new IntWritable(4), ((LazyInteger) o).getWritableObject());
o = map.getMapValueElement(new IntWritable(3));
Assert.assertNotNull(o);
Assert.assertEquals(new IntWritable(6), ((LazyInteger) o).getWritableObject());
}
Aggregations