use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class TestLazyHBaseObject method testLazyHBaseCellMap1.
/**
* Test the LazyMap class with Integer-to-String.
* @throws SerDeException
*/
public void testLazyHBaseCellMap1() throws SerDeException {
// Map of Integer to String
Text nullSequence = new Text("\\N");
ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<int,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
// Initialize a result
List<Cell> kvs = new ArrayList<Cell>();
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col1"), Bytes.toBytes("cfacol1")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col2"), Bytes.toBytes("cfacol2")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("2"), Bytes.toBytes("def")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("-1"), Bytes.toBytes("")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("0"), Bytes.toBytes("0")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("8"), Bytes.toBytes("abc")));
kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfc"), Bytes.toBytes("col3"), Bytes.toBytes("cfccol3")));
Result r = Result.create(kvs);
List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
mapBinaryStorage.add(false);
mapBinaryStorage.add(false);
b.init(r, "cfb".getBytes(), mapBinaryStorage);
assertEquals(new Text("def"), ((LazyString) b.getMapValueElement(new IntWritable(2))).getWritableObject());
assertNull(b.getMapValueElement(new IntWritable(-1)));
assertEquals(new Text("0"), ((LazyString) b.getMapValueElement(new IntWritable(0))).getWritableObject());
assertEquals(new Text("abc"), ((LazyString) b.getMapValueElement(new IntWritable(8))).getWritableObject());
assertNull(b.getMapValueElement(new IntWritable(12345)));
assertEquals("{0:'0',2:'def',8:'abc'}".replace('\'', '\"'), SerDeUtils.getJSONString(b, oi));
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class JsonSerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
List<TypeInfo> columnTypes;
StructTypeInfo rowTypeInfo;
LOG.debug("Initializing JsonSerDe: {}", tbl.entrySet());
// Get column names and types
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
// all table column names
if (columnNameProperty.isEmpty()) {
columnNames = Collections.emptyList();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
// all column types
if (columnTypeProperty.isEmpty()) {
columnTypes = Collections.emptyList();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
LOG.debug("columns: {}, {}", columnNameProperty, columnNames);
LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes);
assert (columnNames.size() == columnTypes.size());
rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
try {
schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
LOG.debug("schema : {}", schema);
LOG.debug("fields : {}", schema.getFieldNames());
} catch (HCatException e) {
throw new SerDeException(e);
}
jsonFactory = new JsonFactory();
tsParser = new TimestampParser(HiveStringUtils.splitAndUnEscape(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS)));
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class AvroSerdeUtils method determineSchemaOrThrowException.
/**
* Determine the schema to that's been provided for Avro serde work.
* @param properties containing a key pointing to the schema, one way or another
* @return schema to use while serdeing the avro file
* @throws IOException if error while trying to read the schema from another location
* @throws AvroSerdeException if unable to find a schema or pointer to it in the properties
*/
public static Schema determineSchemaOrThrowException(Configuration conf, Properties properties) throws IOException, AvroSerdeException {
String schemaString = properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName());
if (schemaString != null && !schemaString.equals(SCHEMA_NONE))
return AvroSerdeUtils.getSchemaFor(schemaString);
// Try pulling directly from URL
schemaString = properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName());
if (schemaString == null) {
final String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnCommentProperty = properties.getProperty(AvroSerDe.LIST_COLUMN_COMMENTS);
if (columnNameProperty == null || columnNameProperty.isEmpty() || columnTypeProperty == null || columnTypeProperty.isEmpty()) {
throw new AvroSerdeException(EXCEPTION_MESSAGE);
}
final String columnNameDelimiter = properties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? properties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
// Get column names and types
List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
Schema schema = AvroSerDe.getSchemaFromCols(properties, columnNames, columnTypes, columnCommentProperty);
properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString());
if (conf != null)
conf.set(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false));
return schema;
} else if (schemaString.equals(SCHEMA_NONE)) {
throw new AvroSerdeException(EXCEPTION_MESSAGE);
}
try {
Schema s = getSchemaFromFS(schemaString, conf);
if (s == null) {
// in case schema is not a file system
return AvroSerdeUtils.getSchemaFor(new URL(schemaString));
}
return s;
} catch (IOException ioe) {
throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, ioe);
} catch (URISyntaxException urie) {
throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, urie);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class ThriftJDBCBinarySerDe method initialize.
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// Get column names
MAX_BUFFERED_ROWS = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE);
LOG.info("ThriftJDBCBinarySerDe max number of buffered columns: " + MAX_BUFFERED_ROWS);
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
rowObjectInspector = (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
initializeRowAndColumns();
try {
thriftFormatter.initialize(conf, tbl);
} catch (Exception e) {
throw new SerDeException(e);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.
the class LazyBinarySerDe method initialize.
/**
* Initialize the SerDe with configuration and table information.
*/
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
// Get column names and types
String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
if (columnNameProperty.length() == 0) {
columnNames = new ArrayList<String>();
} else {
columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
}
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
assert (columnNames.size() == columnTypes.size());
// Create row related objects
rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
// Create the object inspector and the lazy binary struct object
cachedObjectInspector = LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(rowTypeInfo);
cachedLazyBinaryStruct = (LazyBinaryStruct) LazyBinaryFactory.createLazyBinaryObject(cachedObjectInspector);
// output debug info
LOG.debug("LazyBinarySerDe initialized with: columnNames=" + columnNames + " columnTypes=" + columnTypes);
serializedSize = 0;
stats = new SerDeStats();
lastOperationSerialize = false;
lastOperationDeserialize = false;
}
Aggregations