use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class TestDeserializer method testSchemaDeserialize.
@Test
public void testSchemaDeserialize() {
StandardStructObjectInspector schemaObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays.asList("0:col1", "1:col2"), Arrays.asList(PrimitiveObjectInspectorFactory.writableLongObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector));
Deserializer deserializer = new Deserializer.Builder().schema(CUSTOMER_SCHEMA).writerInspector((StructObjectInspector) IcebergObjectInspector.create(CUSTOMER_SCHEMA)).sourceInspector(schemaObjectInspector).build();
Record expected = GenericRecord.create(CUSTOMER_SCHEMA);
expected.set(0, 1L);
expected.set(1, "Bob");
Record actual = deserializer.deserialize(new Object[] { new LongWritable(1L), new Text("Bob") });
Assert.assertEquals(expected, actual);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class JdbcSerDe method initialize.
/**
* This method gets called multiple times by Hive. On some invocations, the properties will be empty.
* We need to detect when the properties are not empty to initialize the class variables.
*/
@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
log.trace("Initializing the JdbcSerDe");
super.initialize(configuration, tableProperties, partitionProperties);
try {
if (properties.containsKey(JdbcStorageConfig.DATABASE_TYPE.getPropertyName())) {
Configuration tableConfig = JdbcStorageConfigManager.convertPropertiesToConfiguration(properties);
DatabaseAccessor dbAccessor = DatabaseAccessorFactory.getAccessor(tableConfig);
// Extract column names and types from properties
List<TypeInfo> hiveColumnTypesList;
if (properties.containsKey(Constants.JDBC_TABLE) && properties.containsKey(Constants.JDBC_QUERY)) {
// The query has been autogenerated by Hive, the column names are the
// same in the query pushed and the list of hiveColumnNames
String fieldNamesProperty = Preconditions.checkNotNull(properties.getProperty(Constants.JDBC_QUERY_FIELD_NAMES, null));
String fieldTypesProperty = Preconditions.checkNotNull(properties.getProperty(Constants.JDBC_QUERY_FIELD_TYPES, null));
hiveColumnNames = fieldNamesProperty.trim().split(",");
hiveColumnTypesList = TypeInfoUtils.getTypeInfosFromTypeString(fieldTypesProperty);
} else if (properties.containsKey(Constants.JDBC_QUERY)) {
// The query has been specified by user, extract column names
hiveColumnNames = properties.getProperty(serdeConstants.LIST_COLUMNS).split(",");
hiveColumnTypesList = TypeInfoUtils.getTypeInfosFromTypeString(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES));
} else {
// Table is specified, we need to get the column names from the
// accessor due to capitalization
hiveColumnNames = dbAccessor.getColumnNames(tableConfig).toArray(new String[0]);
// Number should be equal to list of columns
if (hiveColumnNames.length != properties.getProperty(serdeConstants.LIST_COLUMNS).split(",").length) {
throw new SerDeException("Column numbers do not match. " + "Remote table columns are " + Arrays.toString(hiveColumnNames) + " and declared table columns in Hive " + "external table are " + Arrays.toString(properties.getProperty(serdeConstants.LIST_COLUMNS).split(",")));
}
hiveColumnTypesList = TypeInfoUtils.getTypeInfosFromTypeString(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES));
}
if (hiveColumnNames.length == 0) {
throw new SerDeException("Received an empty Hive column name definition");
}
if (hiveColumnTypesList.size() == 0) {
throw new SerDeException("Received an empty Hive column type definition");
}
numColumns = hiveColumnNames.length;
dbRecordWritable = new DBRecordWritable(numColumns);
// Populate column types and inspector
hiveColumnTypes = new PrimitiveTypeInfo[hiveColumnTypesList.size()];
List<ObjectInspector> fieldInspectors = new ArrayList<>(hiveColumnNames.length);
for (int i = 0; i < hiveColumnNames.length; i++) {
TypeInfo ti = hiveColumnTypesList.get(i);
if (ti.getCategory() != Category.PRIMITIVE) {
throw new SerDeException("Non primitive types not supported yet");
}
hiveColumnTypes[i] = (PrimitiveTypeInfo) ti;
fieldInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(hiveColumnTypes[i]));
}
inspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays.asList(hiveColumnNames), fieldInspectors);
row = new ArrayList<>(hiveColumnNames.length);
}
} catch (Exception e) {
throw new SerDeException("Caught exception while initializing the SqlSerDe", e);
}
if (log.isDebugEnabled()) {
log.debug("JdbcSerDe initialized with\n" + "\t columns: " + Arrays.toString(hiveColumnNames) + "\n\t types: " + Arrays.toString(hiveColumnTypes));
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class TestLazyBinaryStruct method testEmptyStructWithSerde.
@Test
public void testEmptyStructWithSerde() throws SerDeException {
LazyBinaryStructObjectInspector oi = LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(new ArrayList<>(), new ArrayList<>());
StandardStructObjectInspector standardOI = ObjectInspectorFactory.getStandardStructObjectInspector(new ArrayList<>(), new ArrayList<>());
Properties schema = new Properties();
schema.setProperty(serdeConstants.LIST_COLUMNS, "col0");
schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<>");
LazyBinarySerDe serde = new LazyBinarySerDe();
serde.initialize(new Configuration(), schema, null);
Writable writable = serde.serialize(standardOI.create(), standardOI);
Object out = serde.deserialize(writable);
assertNull(oi.getStructFieldsDataAsList(out));
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class TeradataBinarySerde method initialize.
@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
super.initialize(configuration, tableProperties, partitionProperties);
columnNames = Arrays.asList(properties.getProperty(serdeConstants.LIST_COLUMNS).split(","));
String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
LOG.debug(serdeConstants.LIST_COLUMN_TYPES + ": " + columnTypeProperty);
if (columnTypeProperty.length() == 0) {
columnTypes = new ArrayList<TypeInfo>();
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
assert columnNames.size() == columnTypes.size();
numCols = columnNames.size();
// get the configured teradata timestamp precision
// you can configure to generate timestamp of different precision in the binary file generated by TPT/BTEQ
timestampPrecision = Integer.parseInt(properties.getProperty(TD_TIMESTAMP_PRECISION, DEFAULT_TIMESTAMP_PRECISION));
// get the configured teradata char charset
// in TD, latin charset will have 2 bytes per char and unicode will have 3 bytes per char
charCharset = properties.getProperty(TD_CHAR_SET, DEFAULT_CHAR_CHARSET);
if (!CHARSET_TO_BYTE_NUM.containsKey(charCharset)) {
throw new SerDeException(format("%s isn't supported in Teradata Char Charset %s", charCharset, CHARSET_TO_BYTE_NUM.keySet()));
}
// All columns have to be primitive.
// Constructing the row ObjectInspector:
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(numCols);
for (int i = 0; i < numCols; i++) {
if (columnTypes.get(i).getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new SerDeException(getClass().getName() + " only accepts primitive columns, but column[" + i + "] named " + columnNames.get(i) + " has category " + columnTypes.get(i).getCategory());
}
columnOIs.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(columnTypes.get(i)));
}
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
// Constructing the row object and will be reused for all rows
row = new ArrayList<Object>(numCols);
for (int i = 0; i < numCols; i++) {
row.add(null);
}
// Initialize vars related to Null Array which represents the null bitmap
int byteNumForNullArray = (numCols / 8) + ((numCols % 8 == 0) ? 0 : 1);
LOG.debug(format("The Null Bytes for each record will have %s bytes", byteNumForNullArray));
inForNull = new byte[byteNumForNullArray];
out = new TeradataBinaryDataOutputStream();
serializeBytesWritable = new BytesWritable();
outForNull = new byte[byteNumForNullArray];
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.
the class MyTestPrimitiveClass method getRowInspector.
public StructObjectInspector getRowInspector(PrimitiveTypeInfo[] primitiveTypeInfos) {
List<String> columnNames = new ArrayList<String>(primitiveCount);
List<ObjectInspector> primitiveObjectInspectorList = new ArrayList<ObjectInspector>(primitiveCount);
for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
columnNames.add(String.format("col%d", index));
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveCategory));
}
StandardStructObjectInspector rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
return rowOI;
}
Aggregations