Search in sources :

Example 91 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class TestDeserializer method testSchemaDeserialize.

@Test
public void testSchemaDeserialize() {
    StandardStructObjectInspector schemaObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays.asList("0:col1", "1:col2"), Arrays.asList(PrimitiveObjectInspectorFactory.writableLongObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector));
    Deserializer deserializer = new Deserializer.Builder().schema(CUSTOMER_SCHEMA).writerInspector((StructObjectInspector) IcebergObjectInspector.create(CUSTOMER_SCHEMA)).sourceInspector(schemaObjectInspector).build();
    Record expected = GenericRecord.create(CUSTOMER_SCHEMA);
    expected.set(0, 1L);
    expected.set(1, "Bob");
    Record actual = deserializer.deserialize(new Object[] { new LongWritable(1L), new Text("Bob") });
    Assert.assertEquals(expected, actual);
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) Test(org.junit.Test)

Example 92 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class JdbcSerDe method initialize.

/**
 * This method gets called multiple times by Hive. On some invocations, the properties will be empty.
 * We need to detect when the properties are not empty to initialize the class variables.
 */
@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
    log.trace("Initializing the JdbcSerDe");
    super.initialize(configuration, tableProperties, partitionProperties);
    try {
        if (properties.containsKey(JdbcStorageConfig.DATABASE_TYPE.getPropertyName())) {
            Configuration tableConfig = JdbcStorageConfigManager.convertPropertiesToConfiguration(properties);
            DatabaseAccessor dbAccessor = DatabaseAccessorFactory.getAccessor(tableConfig);
            // Extract column names and types from properties
            List<TypeInfo> hiveColumnTypesList;
            if (properties.containsKey(Constants.JDBC_TABLE) && properties.containsKey(Constants.JDBC_QUERY)) {
                // The query has been autogenerated by Hive, the column names are the
                // same in the query pushed and the list of hiveColumnNames
                String fieldNamesProperty = Preconditions.checkNotNull(properties.getProperty(Constants.JDBC_QUERY_FIELD_NAMES, null));
                String fieldTypesProperty = Preconditions.checkNotNull(properties.getProperty(Constants.JDBC_QUERY_FIELD_TYPES, null));
                hiveColumnNames = fieldNamesProperty.trim().split(",");
                hiveColumnTypesList = TypeInfoUtils.getTypeInfosFromTypeString(fieldTypesProperty);
            } else if (properties.containsKey(Constants.JDBC_QUERY)) {
                // The query has been specified by user, extract column names
                hiveColumnNames = properties.getProperty(serdeConstants.LIST_COLUMNS).split(",");
                hiveColumnTypesList = TypeInfoUtils.getTypeInfosFromTypeString(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES));
            } else {
                // Table is specified, we need to get the column names from the
                // accessor due to capitalization
                hiveColumnNames = dbAccessor.getColumnNames(tableConfig).toArray(new String[0]);
                // Number should be equal to list of columns
                if (hiveColumnNames.length != properties.getProperty(serdeConstants.LIST_COLUMNS).split(",").length) {
                    throw new SerDeException("Column numbers do not match. " + "Remote table columns are " + Arrays.toString(hiveColumnNames) + " and declared table columns in Hive " + "external table are " + Arrays.toString(properties.getProperty(serdeConstants.LIST_COLUMNS).split(",")));
                }
                hiveColumnTypesList = TypeInfoUtils.getTypeInfosFromTypeString(properties.getProperty(serdeConstants.LIST_COLUMN_TYPES));
            }
            if (hiveColumnNames.length == 0) {
                throw new SerDeException("Received an empty Hive column name definition");
            }
            if (hiveColumnTypesList.size() == 0) {
                throw new SerDeException("Received an empty Hive column type definition");
            }
            numColumns = hiveColumnNames.length;
            dbRecordWritable = new DBRecordWritable(numColumns);
            // Populate column types and inspector
            hiveColumnTypes = new PrimitiveTypeInfo[hiveColumnTypesList.size()];
            List<ObjectInspector> fieldInspectors = new ArrayList<>(hiveColumnNames.length);
            for (int i = 0; i < hiveColumnNames.length; i++) {
                TypeInfo ti = hiveColumnTypesList.get(i);
                if (ti.getCategory() != Category.PRIMITIVE) {
                    throw new SerDeException("Non primitive types not supported yet");
                }
                hiveColumnTypes[i] = (PrimitiveTypeInfo) ti;
                fieldInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(hiveColumnTypes[i]));
            }
            inspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays.asList(hiveColumnNames), fieldInspectors);
            row = new ArrayList<>(hiveColumnNames.length);
        }
    } catch (Exception e) {
        throw new SerDeException("Caught exception while initializing the SqlSerDe", e);
    }
    if (log.isDebugEnabled()) {
        log.debug("JdbcSerDe initialized with\n" + "\t columns: " + Arrays.toString(hiveColumnNames) + "\n\t types: " + Arrays.toString(hiveColumnTypes));
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) DatabaseAccessor(org.apache.hive.storage.jdbc.dao.DatabaseAccessor) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 93 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class TestLazyBinaryStruct method testEmptyStructWithSerde.

@Test
public void testEmptyStructWithSerde() throws SerDeException {
    LazyBinaryStructObjectInspector oi = LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(new ArrayList<>(), new ArrayList<>());
    StandardStructObjectInspector standardOI = ObjectInspectorFactory.getStandardStructObjectInspector(new ArrayList<>(), new ArrayList<>());
    Properties schema = new Properties();
    schema.setProperty(serdeConstants.LIST_COLUMNS, "col0");
    schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<>");
    LazyBinarySerDe serde = new LazyBinarySerDe();
    serde.initialize(new Configuration(), schema, null);
    Writable writable = serde.serialize(standardOI.create(), standardOI);
    Object out = serde.deserialize(writable);
    assertNull(oi.getStructFieldsDataAsList(out));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Writable(org.apache.hadoop.io.Writable) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) Properties(java.util.Properties) LazyBinaryStructObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector) Test(org.junit.Test)

Example 94 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class TeradataBinarySerde method initialize.

@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
    super.initialize(configuration, tableProperties, partitionProperties);
    columnNames = Arrays.asList(properties.getProperty(serdeConstants.LIST_COLUMNS).split(","));
    String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    LOG.debug(serdeConstants.LIST_COLUMN_TYPES + ": " + columnTypeProperty);
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert columnNames.size() == columnTypes.size();
    numCols = columnNames.size();
    // get the configured teradata timestamp precision
    // you can configure to generate timestamp of different precision in the binary file generated by TPT/BTEQ
    timestampPrecision = Integer.parseInt(properties.getProperty(TD_TIMESTAMP_PRECISION, DEFAULT_TIMESTAMP_PRECISION));
    // get the configured teradata char charset
    // in TD, latin charset will have 2 bytes per char and unicode will have 3 bytes per char
    charCharset = properties.getProperty(TD_CHAR_SET, DEFAULT_CHAR_CHARSET);
    if (!CHARSET_TO_BYTE_NUM.containsKey(charCharset)) {
        throw new SerDeException(format("%s isn't supported in Teradata Char Charset %s", charCharset, CHARSET_TO_BYTE_NUM.keySet()));
    }
    // All columns have to be primitive.
    // Constructing the row ObjectInspector:
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(numCols);
    for (int i = 0; i < numCols; i++) {
        if (columnTypes.get(i).getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new SerDeException(getClass().getName() + " only accepts primitive columns, but column[" + i + "] named " + columnNames.get(i) + " has category " + columnTypes.get(i).getCategory());
        }
        columnOIs.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(columnTypes.get(i)));
    }
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs);
    // Constructing the row object and will be reused for all rows
    row = new ArrayList<Object>(numCols);
    for (int i = 0; i < numCols; i++) {
        row.add(null);
    }
    // Initialize vars related to Null Array which represents the null bitmap
    int byteNumForNullArray = (numCols / 8) + ((numCols % 8 == 0) ? 0 : 1);
    LOG.debug(format("The Null Bytes for each record will have %s bytes", byteNumForNullArray));
    inForNull = new byte[byteNumForNullArray];
    out = new TeradataBinaryDataOutputStream();
    serializeBytesWritable = new BytesWritable();
    outForNull = new byte[byteNumForNullArray];
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 95 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class MyTestPrimitiveClass method getRowInspector.

public StructObjectInspector getRowInspector(PrimitiveTypeInfo[] primitiveTypeInfos) {
    List<String> columnNames = new ArrayList<String>(primitiveCount);
    List<ObjectInspector> primitiveObjectInspectorList = new ArrayList<ObjectInspector>(primitiveCount);
    for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) {
        columnNames.add(String.format("col%d", index));
        PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
        PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
        primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveCategory));
    }
    StandardStructObjectInspector rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
    return rowOI;
}
Also used : WritableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector) WritableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) ArrayList(java.util.ArrayList) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)92 ArrayList (java.util.ArrayList)83 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)68 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)27 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)24 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)21 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)20 Test (org.junit.Test)16 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)14 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)14 Text (org.apache.hadoop.io.Text)14 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)13 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)13 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)13 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)13 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)11 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)10 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)10 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)10 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)10