Search in sources :

Example 26 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class AccumuloRowSerializer method serialize.

public Mutation serialize(Object obj, ObjectInspector objInspector) throws SerDeException, IOException {
    if (objInspector.getCategory() != ObjectInspector.Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
    }
    // Prepare the field ObjectInspectors
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    List<Object> columnValues = soi.getStructFieldsDataAsList(obj);
    // Fail if we try to access an offset out of bounds
    if (rowIdOffset >= fields.size()) {
        throw new IllegalStateException("Attempted to access field outside of definition for struct. Have " + fields.size() + " fields and tried to access offset " + rowIdOffset);
    }
    StructField field = fields.get(rowIdOffset);
    Object value = columnValues.get(rowIdOffset);
    // The ObjectInspector for the row ID
    ObjectInspector fieldObjectInspector = field.getFieldObjectInspector();
    // Serialize the row component using the RowIdFactory. In the normal case, this will just
    // delegate back to the "local" serializeRowId method
    byte[] data = rowIdFactory.serializeRowId(value, field, output);
    // Set that as the row id in the mutation
    Mutation mutation = new Mutation(data);
    // Each column in the row
    for (int i = 0; i < fields.size(); i++) {
        if (rowIdOffset == i) {
            continue;
        }
        // Get the relevant information for this column
        field = fields.get(i);
        value = columnValues.get(i);
        // Despite having a fixed schema from Hive, we have sparse columns in Accumulo
        if (null == value) {
            continue;
        }
        // The ObjectInspector for the current column
        fieldObjectInspector = field.getFieldObjectInspector();
        // Make sure we got the right implementation of a ColumnMapping
        ColumnMapping mapping = mappings.get(i);
        if (mapping instanceof HiveAccumuloColumnMapping) {
            serializeColumnMapping((HiveAccumuloColumnMapping) mapping, fieldObjectInspector, value, mutation);
        } else if (mapping instanceof HiveAccumuloMapColumnMapping) {
            serializeColumnMapping((HiveAccumuloMapColumnMapping) mapping, fieldObjectInspector, value, mutation);
        } else {
            throw new IllegalArgumentException("Mapping for " + field.getFieldName() + " was not a HiveColumnMapping, but was " + mapping.getClass());
        }
    }
    return mutation;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Mutation(org.apache.accumulo.core.data.Mutation) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 27 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class RCFileGenerator method genData.

private static void genData(String format, int numRows, String output, String plainOutput) throws Exception {
    int numFields = 0;
    if (format.equals("student")) {
        rand = new Random(numRows);
        numFields = 3;
    } else if (format.equals("voter")) {
        rand = new Random(1000000000 + numRows);
        numFields = 4;
    } else if (format.equals("alltypes")) {
        rand = new Random(2000000000L + numRows);
        numFields = 10;
    }
    RCFileOutputFormat.setColumnNumber(conf, numFields);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), null, new DefaultCodec());
    PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));
    for (int j = 0; j < numRows; j++) {
        BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);
        byte[][] fields = null;
        if (format.equals("student")) {
            byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), Double.valueOf(randomGpa()).toString().getBytes("UTF-8") };
            fields = f;
        } else if (format.equals("voter")) {
            byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), randomRegistration().getBytes("UTF-8"), Double.valueOf(randomContribution()).toString().getBytes("UTF-8") };
            fields = f;
        } else if (format.equals("alltypes")) {
            byte[][] f = { Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"), Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"), Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"), Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"), randomName().getBytes("UTF-8"), randomMap(), randomArray() };
            fields = f;
        }
        for (int i = 0; i < fields.length; i++) {
            BytesRefWritable field = new BytesRefWritable(fields[i], 0, fields[i].length);
            row.set(i, field);
            pw.print(new String(fields[i]));
            if (i != fields.length - 1)
                pw.print("\t");
            else
                pw.println();
        }
        writer.append(row);
    }
    writer.close();
    pw.close();
}
Also used : RCFile(org.apache.hadoop.hive.ql.io.RCFile) Random(java.util.Random) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) FileWriter(java.io.FileWriter) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) PrintWriter(java.io.PrintWriter) FileWriter(java.io.FileWriter) PrintWriter(java.io.PrintWriter) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Example 28 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class MapJoinKey method read.

@SuppressWarnings("deprecation")
public static MapJoinKey read(Output output, MapJoinObjectSerDeContext context, Writable writable) throws SerDeException, HiveException {
    AbstractSerDe serde = context.getSerDe();
    Object obj = serde.deserialize(writable);
    MapJoinKeyObject result = new MapJoinKeyObject();
    result.read(serde.getObjectInspector(), obj);
    return result;
}
Also used : AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe)

Example 29 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class MapJoinKey method serializeVector.

/**
 * Serializes row to output for vectorized path.
 * @param byteStream Output to reuse. Can be null, in that case a new one would be created.
 */
public static Output serializeVector(Output byteStream, VectorHashKeyWrapperBase kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) throws HiveException, SerDeException {
    Object[] fieldData = new Object[keyOutputWriters.length];
    List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
    for (int i = 0; i < keyOutputWriters.length; ++i) {
        VectorExpressionWriter writer = keyOutputWriters[i];
        fieldOis.add(writer.getObjectInspector());
        // This is rather convoluted... to simplify for perf, we could call getRawKeyValue
        // instead of writable, and serialize based on Java type as opposed to OI.
        fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
        if (nulls != null) {
            nulls[i] = (fieldData[i] == null);
        }
    }
    return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, nullMarkers, notNullMarkers);
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)

Example 30 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class VectorizationContext method getImplicitCastExpression.

/**
 * The GenericUDFs might need their children output to be cast to the given castType.
 * This method returns a cast expression that would achieve the required casting.
 */
private ExprNodeDesc getImplicitCastExpression(GenericUDF udf, ExprNodeDesc child, TypeInfo castType) throws HiveException {
    TypeInfo inputTypeInfo = child.getTypeInfo();
    String inputTypeString = inputTypeInfo.getTypeName();
    String castTypeString = castType.getTypeName();
    if (inputTypeString.equals(castTypeString)) {
        // Nothing to be done
        return null;
    }
    boolean inputTypeDecimal = false;
    boolean castTypeDecimal = false;
    if (decimalTypePattern.matcher(inputTypeString).matches()) {
        inputTypeDecimal = true;
    }
    if (decimalTypePattern.matcher(castTypeString).matches()) {
        castTypeDecimal = true;
    }
    if (castTypeDecimal && !inputTypeDecimal) {
        if (needsImplicitCastForDecimal(udf)) {
            // Cast the input to decimal
            // If castType is decimal, try not to lose precision for numeric types.
            castType = updatePrecision(inputTypeInfo, (DecimalTypeInfo) castType);
            GenericUDFToDecimal castToDecimalUDF = new GenericUDFToDecimal();
            castToDecimalUDF.setTypeInfo(castType);
            List<ExprNodeDesc> children = new ArrayList<>();
            children.add(child);
            return new ExprNodeGenericFuncDesc(castType, castToDecimalUDF, children);
        }
    } else if (!castTypeDecimal && inputTypeDecimal) {
        if (needsImplicitCastForDecimal(udf)) {
            // Cast decimal input to returnType
            GenericUDF genericUdf = getGenericUDFForCast(castType);
            List<ExprNodeDesc> children = new ArrayList<>();
            children.add(child);
            return new ExprNodeGenericFuncDesc(castType, genericUdf, children);
        }
    } else {
        // Casts to exact types including long to double etc. are needed in some special cases.
        if (udf instanceof GenericUDFCoalesce || udf instanceof GenericUDFElt || udf instanceof GenericUDFIf) {
            GenericUDF genericUdf = getGenericUDFForCast(castType);
            List<ExprNodeDesc> children = new ArrayList<>();
            children.add(child);
            return new ExprNodeGenericFuncDesc(castType, genericUdf, children);
        }
    }
    return null;
}
Also used : ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) CastDecimalToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToString) CastLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToString) CastFloatToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastFloatToString) CastDateToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToString) CastTimestampToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToString) CastDoubleToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToString) CastBooleanToStringViaLongToString(org.apache.hadoop.hive.ql.exec.vector.expressions.CastBooleanToStringViaLongToString) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FilterStringColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList) FilterLongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList) DecimalColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalColumnInList) DoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList) TimestampColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.TimestampColumnInList) Decimal64ColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.Decimal64ColumnInList) FilterDecimal64ColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDecimal64ColumnInList) LongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList) FilterDecimalColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDecimalColumnInList) ArrayList(java.util.ArrayList) StructColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.StructColumnInList) FilterStructColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStructColumnInList) FilterTimestampColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterTimestampColumnInList) StringColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.StringColumnInList) List(java.util.List) FilterDoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)77 ArrayList (java.util.ArrayList)72 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)48 Test (org.junit.Test)44 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)43 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)42 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)41 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)41 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)37 Text (org.apache.hadoop.io.Text)35 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)32 IOException (java.io.IOException)29 DateWritableV2 (org.apache.hadoop.hive.serde2.io.DateWritableV2)24 BytesWritable (org.apache.hadoop.io.BytesWritable)23 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)22 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)22 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)21 TimestampWritableV2 (org.apache.hadoop.hive.serde2.io.TimestampWritableV2)21 List (java.util.List)18