Search in sources :

Example 1 with ByteStream

use of org.apache.hadoop.hive.serde2.ByteStream in project hive by apache.

the class MapJoinKey method serializeVector.

/**
 * Serializes row to output for vectorized path.
 * @param byteStream Output to reuse. Can be null, in that case a new one would be created.
 */
public static Output serializeVector(Output byteStream, VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) throws HiveException, SerDeException {
    Object[] fieldData = new Object[keyOutputWriters.length];
    List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
    for (int i = 0; i < keyOutputWriters.length; ++i) {
        VectorExpressionWriter writer = keyOutputWriters[i];
        fieldOis.add(writer.getObjectInspector());
        // This is rather convoluted... to simplify for perf, we could call getRawKeyValue
        // instead of writable, and serialize based on Java type as opposed to OI.
        fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
        if (nulls != null) {
            nulls[i] = (fieldData[i] == null);
        }
    }
    return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, nullMarkers, notNullMarkers);
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)

Example 2 with ByteStream

use of org.apache.hadoop.hive.serde2.ByteStream in project hive by apache.

the class MapJoinKey method serializeVector.

/**
 * Serializes row to output for vectorized path.
 * @param byteStream Output to reuse. Can be null, in that case a new one would be created.
 */
public static Output serializeVector(Output byteStream, VectorHashKeyWrapperBase kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) throws HiveException, SerDeException {
    Object[] fieldData = new Object[keyOutputWriters.length];
    List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
    for (int i = 0; i < keyOutputWriters.length; ++i) {
        VectorExpressionWriter writer = keyOutputWriters[i];
        fieldOis.add(writer.getObjectInspector());
        // This is rather convoluted... to simplify for perf, we could call getRawKeyValue
        // instead of writable, and serialize based on Java type as opposed to OI.
        fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
        if (nulls != null) {
            nulls[i] = (fieldData[i] == null);
        }
    }
    return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, nullMarkers, notNullMarkers);
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)

Example 3 with ByteStream

use of org.apache.hadoop.hive.serde2.ByteStream in project hive by apache.

the class LazyUtils method writePrimitive.

/**
 * Write out a binary representation of a PrimitiveObject to a byte stream.
 *
 * @param out ByteStream.Output, an unsynchronized version of ByteArrayOutputStream, used as a
 *            backing buffer for the the DataOutputStream
 * @param o the PrimitiveObject
 * @param oi the PrimitiveObjectInspector
 * @throws IOException on error during the write operation
 */
public static void writePrimitive(OutputStream out, Object o, PrimitiveObjectInspector oi) throws IOException {
    DataOutputStream dos = new DataOutputStream(out);
    try {
        switch(oi.getPrimitiveCategory()) {
            case BOOLEAN:
                boolean b = ((BooleanObjectInspector) oi).get(o);
                dos.writeBoolean(b);
                break;
            case BYTE:
                byte bt = ((ByteObjectInspector) oi).get(o);
                dos.writeByte(bt);
                break;
            case SHORT:
                short s = ((ShortObjectInspector) oi).get(o);
                dos.writeShort(s);
                break;
            case INT:
                int i = ((IntObjectInspector) oi).get(o);
                dos.writeInt(i);
                break;
            case LONG:
                long l = ((LongObjectInspector) oi).get(o);
                dos.writeLong(l);
                break;
            case FLOAT:
                float f = ((FloatObjectInspector) oi).get(o);
                dos.writeFloat(f);
                break;
            case DOUBLE:
                double d = ((DoubleObjectInspector) oi).get(o);
                dos.writeDouble(d);
                break;
            case BINARY:
                {
                    BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
                    out.write(bw.getBytes(), 0, bw.getLength());
                    break;
                }
            case DECIMAL:
                {
                    HiveDecimalWritable hdw = ((HiveDecimalObjectInspector) oi).getPrimitiveWritableObject(o);
                    hdw.write(dos);
                    break;
                }
            default:
                throw new RuntimeException("Hive internal error.");
        }
    } finally {
        // closing the underlying ByteStream should have no effect, the data should still be
        // accessible
        dos.close();
    }
}
Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) DataOutputStream(java.io.DataOutputStream) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)

Example 4 with ByteStream

use of org.apache.hadoop.hive.serde2.ByteStream in project elephant-bird by twitter.

the class RCFilePigStorage method putNext.

@SuppressWarnings("unchecked")
@Override
public void putNext(Tuple t) throws IOException {
    if (rowWritable == null) {
        // initialize
        if (numColumns < 1) {
            throw new IOException("number of columns is not set");
        }
        byteStream = new ByteStream.Output();
        rowWritable = new BytesRefArrayWritable();
        colValRefs = new BytesRefWritable[numColumns];
        for (int i = 0; i < numColumns; i++) {
            colValRefs[i] = new BytesRefWritable();
            rowWritable.set(i, colValRefs[i]);
        }
    }
    byteStream.reset();
    // write each field as a text (just like PigStorage)
    int sz = t.size();
    int startPos = 0;
    for (int i = 0; i < sz && i < numColumns; i++) {
        StorageUtil.putField(byteStream, t.get(i));
        colValRefs[i].set(byteStream.getData(), startPos, byteStream.getCount() - startPos);
        startPos = byteStream.getCount();
    }
    try {
        writer.write(null, rowWritable);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) ByteStream(org.apache.hadoop.hive.serde2.ByteStream) IOException(java.io.IOException) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 5 with ByteStream

use of org.apache.hadoop.hive.serde2.ByteStream in project hive by apache.

the class LazyBinarySerDe method serializeStruct.

private static void serializeStruct(RandomAccessOutput byteStream, Object obj, StructObjectInspector soi, BooleanRef warnedOnceNullMapKey) throws SerDeException {
    // do nothing for null struct
    if (null == obj) {
        return;
    }
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    int size = fields.size();
    Object[] fieldData = new Object[size];
    List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>(size);
    for (int i = 0; i < size; ++i) {
        StructField field = fields.get(i);
        fieldData[i] = soi.getStructFieldData(obj, field);
        fieldOis.add(field.getFieldObjectInspector());
    }
    serializeStruct(byteStream, fieldData, fieldOis, warnedOnceNullMapKey);
}
Also used : UnionObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) HiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalYearMonthObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) HiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveIntervalDayTimeObjectInspector) DateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) TimestampLocalTZObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector) HiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector) HiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)4 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)4 ArrayList (java.util.ArrayList)3 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)3 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)3 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)3 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)3 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)3 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)3 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)3 VectorExpressionWriter (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)2 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)2 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)2 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 UnionObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector)2 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)2 DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)2 HiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)2 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)2