Search in sources :

Example 96 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class AvroSerializer method serialize.

// Hive is pretty simple (read: stupid) in writing out values via the serializer.
// We're just going to go through, matching indices.  Hive formats normally
// handle mismatches with null.  We don't have that option, so instead we'll
// end up throwing an exception for invalid records.
public Writable serialize(Object o, ObjectInspector objectInspector, List<String> columnNames, List<TypeInfo> columnTypes, Schema schema) throws AvroSerdeException {
    StructObjectInspector soi = (StructObjectInspector) objectInspector;
    GenericData.Record record = new GenericData.Record(schema);
    List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs();
    if (outputFieldRefs.size() != columnNames.size()) {
        throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size());
    }
    int size = schema.getFields().size();
    if (outputFieldRefs.size() != size) {
        throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() + ", Avro expected " + schema.getFields().size());
    }
    List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
    List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);
    for (int i = 0; i < size; i++) {
        Field field = schema.getFields().get(i);
        TypeInfo typeInfo = columnTypes.get(i);
        StructField structFieldRef = allStructFieldRefs.get(i);
        Object structFieldData = structFieldsDataAsList.get(i);
        ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
        Object val = serialize(typeInfo, fieldOI, structFieldData, field.schema());
        record.put(field.name(), val);
    }
    if (!GenericData.get().validate(schema, record)) {
        throw new SerializeToAvroException(schema, record);
    }
    cache.setRecord(record);
    return cache;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) GenericData(org.apache.avro.generic.GenericData) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) Field(org.apache.avro.Schema.Field) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 97 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class WritableHiveVarcharObjectInspector method getPrimitiveWritableObject.

@Override
public HiveVarcharWritable getPrimitiveWritableObject(Object o) {
    // then output new writable with correct params.
    if (o == null) {
        return null;
    }
    if (o instanceof Text) {
        String str = ((Text) o).toString();
        HiveVarcharWritable hcw = new HiveVarcharWritable();
        hcw.set(str, ((VarcharTypeInfo) typeInfo).getLength());
        return hcw;
    }
    HiveVarcharWritable writable = ((HiveVarcharWritable) o);
    if (doesWritableMatchTypeParams((HiveVarcharWritable) o)) {
        return writable;
    }
    return getWritableWithParams(writable);
}
Also used : HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text)

Example 98 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class LazyBinarySerializeWrite method writeTimestamp.

/*
   * TIMESTAMP.
   */
@Override
public void writeTimestamp(Timestamp v) throws IOException {
    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
        if (fieldIndex > 0) {
            // Write back previous 8 field's NULL byte.
            output.writeByte(nullOffset, nullByte);
            nullByte = 0;
            nullOffset = output.getLength();
        }
        // Allocate next NULL byte.
        output.reserve(1);
    }
    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);
    if (timestampWritable == null) {
        timestampWritable = new TimestampWritable();
    }
    timestampWritable.set(v);
    timestampWritable.writeToByteStream(output);
    fieldIndex++;
    if (fieldIndex == fieldCount) {
        // Write back the final NULL byte before the last fields.
        output.writeByte(nullOffset, nullByte);
    }
}
Also used : TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable)

Example 99 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class TestGenericUDFLastDay method runAndVerifyTs.

private void runAndVerifyTs(String str, String expResult, GenericUDF udf) throws HiveException {
    DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new TimestampWritable(Timestamp.valueOf(str)) : null);
    DeferredObject[] args = { valueObj0 };
    Text output = (Text) udf.evaluate(args);
    assertEquals("last_day() test ", expResult, output != null ? output.toString() : null);
}
Also used : DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Text(org.apache.hadoop.io.Text)

Example 100 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class TestGenericUDFQuarter method runAndVerifyTs.

private void runAndVerifyTs(String str, Integer expResult, GenericUDF udf) throws HiveException {
    DeferredObject valueObj0 = new DeferredJavaObject(str != null ? new TimestampWritable(Timestamp.valueOf(str)) : null);
    DeferredObject[] args = { valueObj0 };
    IntWritable output = (IntWritable) udf.evaluate(args);
    if (expResult == null) {
        assertNull(output);
    } else {
        assertNotNull(output);
        assertEquals("quarter() test ", expResult.intValue(), output.get());
    }
}
Also used : DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)66 ArrayList (java.util.ArrayList)46 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)37 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)37 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)36 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)33 Text (org.apache.hadoop.io.Text)27 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)26 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)20 IOException (java.io.IOException)19 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)19 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)19 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)18 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)17 BytesWritable (org.apache.hadoop.io.BytesWritable)17 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)16 Timestamp (java.sql.Timestamp)14 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)14 IntWritable (org.apache.hadoop.io.IntWritable)13 Date (java.sql.Date)11