use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.
the class AccumuloRowSerializer method serialize.
public Mutation serialize(Object obj, ObjectInspector objInspector) throws SerDeException, IOException {
if (objInspector.getCategory() != ObjectInspector.Category.STRUCT) {
throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
}
// Prepare the field ObjectInspectors
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<Object> columnValues = soi.getStructFieldsDataAsList(obj);
// Fail if we try to access an offset out of bounds
if (rowIdOffset >= fields.size()) {
throw new IllegalStateException("Attempted to access field outside of definition for struct. Have " + fields.size() + " fields and tried to access offset " + rowIdOffset);
}
StructField field = fields.get(rowIdOffset);
Object value = columnValues.get(rowIdOffset);
// The ObjectInspector for the row ID
ObjectInspector fieldObjectInspector = field.getFieldObjectInspector();
// Serialize the row component using the RowIdFactory. In the normal case, this will just
// delegate back to the "local" serializeRowId method
byte[] data = rowIdFactory.serializeRowId(value, field, output);
// Set that as the row id in the mutation
Mutation mutation = new Mutation(data);
// Each column in the row
for (int i = 0; i < fields.size(); i++) {
if (rowIdOffset == i) {
continue;
}
// Get the relevant information for this column
field = fields.get(i);
value = columnValues.get(i);
// Despite having a fixed schema from Hive, we have sparse columns in Accumulo
if (null == value) {
continue;
}
// The ObjectInspector for the current column
fieldObjectInspector = field.getFieldObjectInspector();
// Make sure we got the right implementation of a ColumnMapping
ColumnMapping mapping = mappings.get(i);
if (mapping instanceof HiveAccumuloColumnMapping) {
serializeColumnMapping((HiveAccumuloColumnMapping) mapping, fieldObjectInspector, value, mutation);
} else if (mapping instanceof HiveAccumuloMapColumnMapping) {
serializeColumnMapping((HiveAccumuloMapColumnMapping) mapping, fieldObjectInspector, value, mutation);
} else {
throw new IllegalArgumentException("Mapping for " + field.getFieldName() + " was not a HiveColumnMapping, but was " + mapping.getClass());
}
}
return mutation;
}
use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.
the class RCFileGenerator method genData.
private static void genData(String format, int numRows, String output, String plainOutput) throws Exception {
int numFields = 0;
if (format.equals("student")) {
rand = new Random(numRows);
numFields = 3;
} else if (format.equals("voter")) {
rand = new Random(1000000000 + numRows);
numFields = 4;
} else if (format.equals("alltypes")) {
rand = new Random(2000000000L + numRows);
numFields = 10;
}
RCFileOutputFormat.setColumnNumber(conf, numFields);
RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), null, new DefaultCodec());
PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));
for (int j = 0; j < numRows; j++) {
BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);
byte[][] fields = null;
if (format.equals("student")) {
byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), Double.valueOf(randomGpa()).toString().getBytes("UTF-8") };
fields = f;
} else if (format.equals("voter")) {
byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), randomRegistration().getBytes("UTF-8"), Double.valueOf(randomContribution()).toString().getBytes("UTF-8") };
fields = f;
} else if (format.equals("alltypes")) {
byte[][] f = { Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"), Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"), Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"), Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"), randomName().getBytes("UTF-8"), randomMap(), randomArray() };
fields = f;
}
for (int i = 0; i < fields.length; i++) {
BytesRefWritable field = new BytesRefWritable(fields[i], 0, fields[i].length);
row.set(i, field);
pw.print(new String(fields[i]));
if (i != fields.length - 1)
pw.print("\t");
else
pw.println();
}
writer.append(row);
}
writer.close();
pw.close();
}
use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.
the class MapJoinKey method read.
@SuppressWarnings("deprecation")
public static MapJoinKey read(Output output, MapJoinObjectSerDeContext context, Writable writable) throws SerDeException, HiveException {
AbstractSerDe serde = context.getSerDe();
Object obj = serde.deserialize(writable);
MapJoinKeyObject result = new MapJoinKeyObject();
result.read(serde.getObjectInspector(), obj);
return result;
}
use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.
the class MapJoinKey method serializeVector.
/**
* Serializes row to output for vectorized path.
* @param byteStream Output to reuse. Can be null, in that case a new one would be created.
*/
public static Output serializeVector(Output byteStream, VectorHashKeyWrapperBase kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) throws HiveException, SerDeException {
Object[] fieldData = new Object[keyOutputWriters.length];
List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
for (int i = 0; i < keyOutputWriters.length; ++i) {
VectorExpressionWriter writer = keyOutputWriters[i];
fieldOis.add(writer.getObjectInspector());
// This is rather convoluted... to simplify for perf, we could call getRawKeyValue
// instead of writable, and serialize based on Java type as opposed to OI.
fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
if (nulls != null) {
nulls[i] = (fieldData[i] == null);
}
}
return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, nullMarkers, notNullMarkers);
}
use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.
the class VectorizationContext method getImplicitCastExpression.
/**
* The GenericUDFs might need their children output to be cast to the given castType.
* This method returns a cast expression that would achieve the required casting.
*/
private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType) throws HiveException {
TypeInfo inputTypeInfo = child.getTypeInfo();
String inputTypeString = inputTypeInfo.getTypeName();
String castTypeString = castType.getTypeName();
if (inputTypeString.equals(castTypeString)) {
// Nothing to be done
return null;
}
boolean inputTypeDecimal = false;
boolean castTypeDecimal = false;
if (decimalTypePattern.matcher(inputTypeString).matches()) {
inputTypeDecimal = true;
}
if (decimalTypePattern.matcher(castTypeString).matches()) {
castTypeDecimal = true;
}
if (castTypeDecimal && !inputTypeDecimal) {
if (needsImplicitCastForDecimal(udf)) {
// Cast the input to decimal
// If castType is decimal, try not to lose precision for numeric types.
castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal();
castToDecimalUDF.setTypeInfo(castType);
List<ExprNodeDesc> children = new ArrayList<>();
children.add(child);
return new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children);
}
} else if (!castTypeDecimal && inputTypeDecimal) {
if (needsImplicitCastForDecimal(udf)) {
// Cast decimal input to returnType
GenericUDF genericUdf = getGenericUDFForCast(castType);
List<ExprNodeDesc> children = new ArrayList<>();
children.add(child);
return new ExprNodeGenericFuncDesc(castType, genericUdf, children);
}
} else {
// Casts to exact types including long to double etc. are needed in some special cases.
if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFElt || udf instanceof GenericUDFIf) {
GenericUDF genericUdf = getGenericUDFForCast(castType);
List<ExprNodeDesc> children = new ArrayList<>();
children.add(child);
return new ExprNodeGenericFuncDesc(castType, genericUdf, children);
}
}
return null;
}
Aggregations