Search in sources :

Example 16 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestLazyBinarySerDe method testShorterSchemaDeserialization1.

/**
 * Test shorter schema deserialization where a bigger struct is serialized and
 * it is then deserialized with a smaller struct. Here the serialized struct
 * has 9 fields and we deserialized to a struct of 8 fields.
 */
private void testShorterSchemaDeserialization1(Random r) throws Throwable {
    StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
    AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    serde1.getObjectInspector();
    StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassSmaller.class, ObjectInspectorOptions.JAVA);
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
    AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();
    int num = 100;
    for (int itest = 0; itest < num; itest++) {
        MyTestClass t = new MyTestClass();
        ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
        t.randomFill(r, extraTypeInfo);
        BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
        Object output = serde2.deserialize(bw);
        if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
            System.out.println("structs      = " + SerDeUtils.getJSONString(t, rowOI1));
            System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
            System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
            assertEquals(t, output);
        }
    }
}
Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) BytesWritable(org.apache.hadoop.io.BytesWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 17 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class LazySimpleSerDe method doSerialize.

/**
 * Serialize a row of data.
 *
 * @param obj
 *          The row object
 * @param objInspector
 *          The ObjectInspector for the row object
 * @return The serialized Writable object
 * @throws SerDeException
 * @see org.apache.hadoop.hive.serde2.AbstractSerDe#serialize(Object, ObjectInspector)
 */
@Override
public Writable doSerialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    if (objInspector.getCategory() != Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
    }
    // Prepare the field ObjectInspectors
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    List<Object> list = soi.getStructFieldsDataAsList(obj);
    List<? extends StructField> declaredFields = (serdeParams.getRowTypeInfo() != null && ((StructTypeInfo) serdeParams.getRowTypeInfo()).getAllStructFieldNames().size() > 0) ? ((StructObjectInspector) getObjectInspector()).getAllStructFieldRefs() : null;
    serializeStream.reset();
    serializedSize = 0;
    // Serialize each field
    for (int i = 0; i < fields.size(); i++) {
        // Append the separator if needed.
        if (i > 0) {
            serializeStream.write(serdeParams.getSeparators()[0]);
        }
        // Get the field objectInspector and the field object.
        ObjectInspector foi = fields.get(i).getFieldObjectInspector();
        Object f = (list == null ? null : list.get(i));
        if (declaredFields != null && i >= declaredFields.size()) {
            throw new SerDeException("Error: expecting " + declaredFields.size() + " but asking for field " + i + "\n" + "data=" + obj + "\n" + "tableType=" + serdeParams.getRowTypeInfo().toString() + "\n" + "dataType=" + TypeInfoUtils.getTypeInfoFromObjectInspector(objInspector));
        }
        serializeField(serializeStream, f, foi, serdeParams);
    }
    // TODO: The copy of data is unnecessary, but there is no work-around
    // since we cannot directly set the private byte[] field inside Text.
    serializeCache.set(serializeStream.getData(), 0, serializeStream.getLength());
    serializedSize = serializeStream.getLength();
    lastOperationSerialize = true;
    lastOperationDeserialize = false;
    return serializeCache;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 18 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class FileOutputFormatContainer method getRecordWriter.

@Override
public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    // this needs to be manually set, under normal circumstances MR Task does this
    setWorkOutputPath(context);
    // Configure the output key and value classes.
    // This is required for writing null as key for file based tables.
    context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName());
    String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
    OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
    StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
    HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), storeInfo);
    Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass();
    AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde, context.getConfiguration());
    context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName());
    RecordWriter<WritableComparable<?>, HCatRecord> rw;
    if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()) {
        // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
        // (That's because records can't be written until the values of the dynamic partitions are deduced.
        // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
        rw = new DynamicPartitionFileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter) null, context);
    } else {
        Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
        Path childPath = new Path(parentDir, FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), context.getConfiguration().get("mapreduce.output.basename", "part")));
        rw = new StaticPartitionFileRecordWriterContainer(getBaseOutputFormat().getRecordWriter(parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)), context);
    }
    return rw;
}
Also used : Path(org.apache.hadoop.fs.Path) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) NullWritable(org.apache.hadoop.io.NullWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) WritableComparable(org.apache.hadoop.io.WritableComparable) JobConf(org.apache.hadoop.mapred.JobConf) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 19 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class FileRecordWriterContainer method write.

@Override
public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException {
    LocalFileWriter localFileWriter = getLocalFileWriter(value);
    RecordWriter localWriter = localFileWriter.getLocalWriter();
    ObjectInspector localObjectInspector = localFileWriter.getLocalObjectInspector();
    AbstractSerDe localSerDe = localFileWriter.getLocalSerDe();
    OutputJobInfo localJobInfo = localFileWriter.getLocalJobInfo();
    for (Integer colToDel : partColsToDel) {
        value.remove(colToDel);
    }
    try {
        // The key given by user is ignored - in case of Parquet we need to supply null
        Object keyToWrite = localWriter instanceof ParquetRecordWriterWrapper ? null : NullWritable.get();
        localWriter.write(keyToWrite, localSerDe.serialize(value.getAll(), localObjectInspector));
    } catch (SerDeException e) {
        throw new IOException("Failed to serialize object", e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) RecordWriter(org.apache.hadoop.mapred.RecordWriter) ParquetRecordWriterWrapper(org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper) IOException(java.io.IOException) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 20 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestRegexSerDe method testRegexSerDe.

/**
 * Test the LazySimpleSerDe class.
 */
public void testRegexSerDe() throws Throwable {
    try {
        // Create the SerDe
        AbstractSerDe serDe = createSerDe("host,identity,user,time,request,status,size,referer,agent", "string,string,string,string,string,string,string,string,string", "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") " + "([0-9]*) ([0-9]*) ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\")", "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s");
        // Data
        Text t = new Text("127.0.0.1 - - [26/May/2009:00:00:00 +0000] " + "\"GET /someurl/?track=Blabla(Main) HTTP/1.1\" 200 5864 - " + "\"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) " + "AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.65 Safari/525.19\"");
        // Deserialize
        Object row = serDe.deserialize(t);
        ObjectInspector rowOI = serDe.getObjectInspector();
        System.out.println("Deserialized row: " + row);
        // Serialize
        Text serialized = (Text) serDe.serialize(row, rowOI);
        assertEquals(t, serialized);
        // Do some changes (optional)
        ObjectInspector standardWritableRowOI = ObjectInspectorUtils.getStandardObjectInspector(rowOI, ObjectInspectorCopyOption.WRITABLE);
        Object standardWritableRow = ObjectInspectorUtils.copyToStandardObject(row, rowOI, ObjectInspectorCopyOption.WRITABLE);
        // Serialize
        serialized = (Text) serDe.serialize(standardWritableRow, standardWritableRowOI);
        assertEquals(t, serialized);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe)

Aggregations

AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)43 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)25 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)15 ArrayList (java.util.ArrayList)12 Properties (java.util.Properties)12 BytesWritable (org.apache.hadoop.io.BytesWritable)11 IOException (java.io.IOException)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Writable (org.apache.hadoop.io.Writable)8 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)7 InputSplit (org.apache.hadoop.mapred.InputSplit)7 Test (org.junit.Test)7 AbstractPrimitiveLazyObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector)6 LazyBinaryMapObjectInspector (org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector)6 JavaBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector)6 WritableBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)6 LinkedHashMap (java.util.LinkedHashMap)5 Path (org.apache.hadoop.fs.Path)5 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)5