use of org.apache.hadoop.hive.serde2.ByteStream in project hive by apache.
the class MapJoinKey method serializeVector.
/**
* Serializes row to output for vectorized path.
* @param byteStream Output to reuse. Can be null, in that case a new one would be created.
*/
public static Output serializeVector(Output byteStream, VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) throws HiveException, SerDeException {
Object[] fieldData = new Object[keyOutputWriters.length];
List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
for (int i = 0; i < keyOutputWriters.length; ++i) {
VectorExpressionWriter writer = keyOutputWriters[i];
fieldOis.add(writer.getObjectInspector());
// This is rather convoluted... to simplify for perf, we could call getRawKeyValue
// instead of writable, and serialize based on Java type as opposed to OI.
fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
if (nulls != null) {
nulls[i] = (fieldData[i] == null);
}
}
return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, nullMarkers, notNullMarkers);
}
use of org.apache.hadoop.hive.serde2.ByteStream in project hive by apache.
the class MapJoinKey method serializeVector.
/**
* Serializes row to output for vectorized path.
* @param byteStream Output to reuse. Can be null, in that case a new one would be created.
*/
public static Output serializeVector(Output byteStream, VectorHashKeyWrapperBase kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) throws HiveException, SerDeException {
Object[] fieldData = new Object[keyOutputWriters.length];
List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
for (int i = 0; i < keyOutputWriters.length; ++i) {
VectorExpressionWriter writer = keyOutputWriters[i];
fieldOis.add(writer.getObjectInspector());
// This is rather convoluted... to simplify for perf, we could call getRawKeyValue
// instead of writable, and serialize based on Java type as opposed to OI.
fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
if (nulls != null) {
nulls[i] = (fieldData[i] == null);
}
}
return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, nullMarkers, notNullMarkers);
}
use of org.apache.hadoop.hive.serde2.ByteStream in project hive by apache.
the class LazyUtils method writePrimitive.
/**
* Write out a binary representation of a PrimitiveObject to a byte stream.
*
* @param out ByteStream.Output, an unsynchronized version of ByteArrayOutputStream, used as a
* backing buffer for the the DataOutputStream
* @param o the PrimitiveObject
* @param oi the PrimitiveObjectInspector
* @throws IOException on error during the write operation
*/
public static void writePrimitive(OutputStream out, Object o, PrimitiveObjectInspector oi) throws IOException {
DataOutputStream dos = new DataOutputStream(out);
try {
switch(oi.getPrimitiveCategory()) {
case BOOLEAN:
boolean b = ((BooleanObjectInspector) oi).get(o);
dos.writeBoolean(b);
break;
case BYTE:
byte bt = ((ByteObjectInspector) oi).get(o);
dos.writeByte(bt);
break;
case SHORT:
short s = ((ShortObjectInspector) oi).get(o);
dos.writeShort(s);
break;
case INT:
int i = ((IntObjectInspector) oi).get(o);
dos.writeInt(i);
break;
case LONG:
long l = ((LongObjectInspector) oi).get(o);
dos.writeLong(l);
break;
case FLOAT:
float f = ((FloatObjectInspector) oi).get(o);
dos.writeFloat(f);
break;
case DOUBLE:
double d = ((DoubleObjectInspector) oi).get(o);
dos.writeDouble(d);
break;
case BINARY:
{
BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
out.write(bw.getBytes(), 0, bw.getLength());
break;
}
case DECIMAL:
{
HiveDecimalWritable hdw = ((HiveDecimalObjectInspector) oi).getPrimitiveWritableObject(o);
hdw.write(dos);
break;
}
default:
throw new RuntimeException("Hive internal error.");
}
} finally {
// closing the underlying ByteStream should have no effect, the data should still be
// accessible
dos.close();
}
}
use of org.apache.hadoop.hive.serde2.ByteStream in project elephant-bird by twitter.
the class RCFilePigStorage method putNext.
@SuppressWarnings("unchecked")
@Override
public void putNext(Tuple t) throws IOException {
if (rowWritable == null) {
// initialize
if (numColumns < 1) {
throw new IOException("number of columns is not set");
}
byteStream = new ByteStream.Output();
rowWritable = new BytesRefArrayWritable();
colValRefs = new BytesRefWritable[numColumns];
for (int i = 0; i < numColumns; i++) {
colValRefs[i] = new BytesRefWritable();
rowWritable.set(i, colValRefs[i]);
}
}
byteStream.reset();
// write each field as a text (just like PigStorage)
int sz = t.size();
int startPos = 0;
for (int i = 0; i < sz && i < numColumns; i++) {
StorageUtil.putField(byteStream, t.get(i));
colValRefs[i].set(byteStream.getData(), startPos, byteStream.getCount() - startPos);
startPos = byteStream.getCount();
}
try {
writer.write(null, rowWritable);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.hive.serde2.ByteStream in project hive by apache.
the class LazyBinarySerDe method serializeStruct.
private static void serializeStruct(RandomAccessOutput byteStream, Object obj, StructObjectInspector soi, BooleanRef warnedOnceNullMapKey) throws SerDeException {
// do nothing for null struct
if (null == obj) {
return;
}
List<? extends StructField> fields = soi.getAllStructFieldRefs();
int size = fields.size();
Object[] fieldData = new Object[size];
List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>(size);
for (int i = 0; i < size; ++i) {
StructField field = fields.get(i);
fieldData[i] = soi.getStructFieldData(obj, field);
fieldOis.add(field.getFieldObjectInspector());
}
serializeStruct(byteStream, fieldData, fieldOis, warnedOnceNullMapKey);
}
Aggregations