Search in sources :

Example 86 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class RegexSerDe method initialize.

@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
    super.initialize(configuration, tableProperties, partitionProperties);
    numColumns = this.getColumnNames().size();
    // Read the configuration parameters
    inputRegex = properties.getProperty(INPUT_REGEX);
    outputFormatString = properties.getProperty(OUTPUT_FORMAT_STRING);
    boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(properties.getProperty(INPUT_REGEX_CASE_SENSITIVE));
    // Parse the configuration parameters
    if (inputRegex != null) {
        inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
    } else {
        inputPattern = null;
    }
    // All columns have to be of type STRING
    int i = 0;
    for (TypeInfo type : getColumnTypes()) {
        if (!type.equals(TypeInfoFactory.stringTypeInfo)) {
            throw new SerDeException(getClass().getName() + " only accepts string columns, but column[" + i + "] named " + getColumnNames().get(i) + " has type " + type);
        }
        i++;
    }
    // Constructing the row ObjectInspector:
    // The row consists of some string columns, each column will be a java
    // String object.
    List<ObjectInspector> columnOIs = Collections.nCopies(numColumns, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(getColumnNames(), columnOIs);
    // Constructing the row object, etc, which will be reused for all rows.
    row = new ArrayList<>(Collections.nCopies(numColumns, null));
    outputFields = new Object[numColumns];
    outputRowText = new Text();
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 87 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class TypedBytesSerDe method initialize.

@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
    super.initialize(configuration, tableProperties, partitionProperties);
    // We can get the table definition from tbl.
    serializeBytesWritable = new BytesWritable();
    barrStr = new NonSyncDataOutputBuffer();
    tbOut = new TypedBytesWritableOutput(barrStr);
    inBarrStr = new NonSyncDataInputBuffer();
    tbIn = new TypedBytesWritableInput(inBarrStr);
    // Read the configuration parameters
    numColumns = getColumnNames().size();
    // All columns have to be primitive.
    for (int c = 0; c < numColumns; c++) {
        if (getColumnTypes().get(c).getCategory() != Category.PRIMITIVE) {
            throw new SerDeException(getClass().getName() + " only accepts primitive columns, but column[" + c + "] named " + getColumnNames().get(c) + " has category " + getColumnTypes().get(c).getCategory());
        }
    }
    // Constructing the row ObjectInspector:
    // The row consists of some string columns, each column will be a java
    // String object.
    List<ObjectInspector> columnOIs = new ArrayList<>(getColumnNames().size());
    for (TypeInfo colType : getColumnTypes()) {
        columnOIs.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(colType));
    }
    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(getColumnNames(), columnOIs);
    // Constructing the row object, etc, which will be reused for all rows.
    row = new ArrayList<>(Collections.nCopies(numColumns, null));
}
Also used : TypedBytesWritableInput(org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesWritableInput) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) NonSyncDataOutputBuffer(org.apache.hadoop.hive.ql.io.NonSyncDataOutputBuffer) TypedBytesWritableOutput(org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesWritableOutput) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) NonSyncDataInputBuffer(org.apache.hadoop.hive.ql.io.NonSyncDataInputBuffer) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 88 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class TestDruidSerDe method serializeObject.

private static void serializeObject(Properties properties, DruidSerDe serDe, Object[] rowObject, DruidWritable druidWritable) throws SerDeException {
    // Build OI with timestamp granularity column
    final List<String> columnNames = new ArrayList<>(Utilities.getColumnNames(properties));
    columnNames.add(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME);
    final List<PrimitiveTypeInfo> columnTypes = Utilities.getColumnTypes(properties).stream().map(TypeInfoFactory::getPrimitiveTypeInfo).collect(Collectors.toList());
    columnTypes.add(TypeInfoFactory.getPrimitiveTypeInfo("timestamp"));
    List<ObjectInspector> inspectors = columnTypes.stream().map(PrimitiveObjectInspectorFactory::getPrimitiveWritableObjectInspector).collect(Collectors.toList());
    ObjectInspector inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
    // Serialize
    DruidWritable writable = (DruidWritable) serDe.serialize(rowObject, inspector);
    // Check result
    assertEquals(druidWritable.getValue().size(), writable.getValue().size());
    for (Entry<String, Object> e : druidWritable.getValue().entrySet()) {
        assertEquals(e.getValue(), writable.getValue().get(e.getKey()));
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) ArgumentMatchers.anyObject(org.mockito.ArgumentMatchers.anyObject) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 89 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class InternalUtil method createStructObjectInspector.

static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException {
    if (outputSchema == null) {
        throw new IOException("Invalid output schema specified");
    }
    List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
    List<String> fieldNames = new ArrayList<String>();
    for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) {
        TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString());
        fieldNames.add(hcatFieldSchema.getName());
        fieldInspectors.add(getObjectInspector(type));
    }
    StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldInspectors);
    return structInspector;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 90 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class TestDeserializer method testListDeserialize.

@Test
public void testListDeserialize() {
    Schema schema = new Schema(optional(1, "list_type", Types.ListType.ofOptional(2, Types.LongType.get())));
    StructObjectInspector inspector = ObjectInspectorFactory.getStandardStructObjectInspector(Arrays.asList("list_type"), Arrays.asList(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector)));
    Deserializer deserializer = new Deserializer.Builder().schema(schema).writerInspector((StructObjectInspector) IcebergObjectInspector.create(schema)).sourceInspector(inspector).build();
    Record expected = GenericRecord.create(schema);
    expected.set(0, Collections.singletonList(1L));
    Object[] data = new Object[] { new Object[] { new LongWritable(1L) } };
    Record actual = deserializer.deserialize(data);
    Assert.assertEquals(expected, actual);
}
Also used : Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) LongWritable(org.apache.hadoop.io.LongWritable) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)92 ArrayList (java.util.ArrayList)83 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)68 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)27 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)24 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)21 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)20 Test (org.junit.Test)16 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)14 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)14 Text (org.apache.hadoop.io.Text)14 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)13 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)13 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)13 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)13 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)11 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)10 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)10 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)10 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)10