Search in sources :

Example 16 with CharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo in project hive by apache.

the class VectorUDFAdaptor method setOutputCol.

private void setOutputCol(ColumnVector colVec, int i, Object value) {
    /* Depending on the output type, get the value, cast the result to the
     * correct type if needed, and assign the result into the output vector.
     */
    if (outputOI instanceof WritableStringObjectInspector) {
        BytesColumnVector bv = (BytesColumnVector) colVec;
        Text t;
        if (value instanceof String) {
            t = new Text((String) value);
        } else {
            t = ((WritableStringObjectInspector) outputOI).getPrimitiveWritableObject(value);
        }
        bv.setVal(i, t.getBytes(), 0, t.getLength());
    } else if (outputOI instanceof WritableHiveCharObjectInspector) {
        WritableHiveCharObjectInspector writableHiveCharObjectOI = (WritableHiveCharObjectInspector) outputOI;
        int maxLength = ((CharTypeInfo) writableHiveCharObjectOI.getTypeInfo()).getLength();
        BytesColumnVector bv = (BytesColumnVector) colVec;
        HiveCharWritable hiveCharWritable;
        if (value instanceof HiveCharWritable) {
            hiveCharWritable = ((HiveCharWritable) value);
        } else {
            hiveCharWritable = writableHiveCharObjectOI.getPrimitiveWritableObject(value);
        }
        Text t = hiveCharWritable.getTextValue();
        // In vector mode, we stored CHAR as unpadded.
        StringExpr.rightTrimAndTruncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength);
    } else if (outputOI instanceof WritableHiveVarcharObjectInspector) {
        WritableHiveVarcharObjectInspector writableHiveVarcharObjectOI = (WritableHiveVarcharObjectInspector) outputOI;
        int maxLength = ((VarcharTypeInfo) writableHiveVarcharObjectOI.getTypeInfo()).getLength();
        BytesColumnVector bv = (BytesColumnVector) colVec;
        HiveVarcharWritable hiveVarcharWritable;
        if (value instanceof HiveVarcharWritable) {
            hiveVarcharWritable = ((HiveVarcharWritable) value);
        } else {
            hiveVarcharWritable = writableHiveVarcharObjectOI.getPrimitiveWritableObject(value);
        }
        Text t = hiveVarcharWritable.getTextValue();
        StringExpr.truncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength);
    } else if (outputOI instanceof WritableIntObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Integer) {
            lv.vector[i] = (Integer) value;
        } else {
            lv.vector[i] = ((WritableIntObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableLongObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Long) {
            lv.vector[i] = (Long) value;
        } else {
            lv.vector[i] = ((WritableLongObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableDoubleObjectInspector) {
        DoubleColumnVector dv = (DoubleColumnVector) colVec;
        if (value instanceof Double) {
            dv.vector[i] = (Double) value;
        } else {
            dv.vector[i] = ((WritableDoubleObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableFloatObjectInspector) {
        DoubleColumnVector dv = (DoubleColumnVector) colVec;
        if (value instanceof Float) {
            dv.vector[i] = (Float) value;
        } else {
            dv.vector[i] = ((WritableFloatObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableShortObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Short) {
            lv.vector[i] = (Short) value;
        } else {
            lv.vector[i] = ((WritableShortObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableByteObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Byte) {
            lv.vector[i] = (Byte) value;
        } else {
            lv.vector[i] = ((WritableByteObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableTimestampObjectInspector) {
        TimestampColumnVector tv = (TimestampColumnVector) colVec;
        Timestamp ts;
        if (value instanceof Timestamp) {
            ts = (Timestamp) value;
        } else {
            ts = ((WritableTimestampObjectInspector) outputOI).getPrimitiveJavaObject(value);
        }
        tv.set(i, ts);
    } else if (outputOI instanceof WritableDateObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        Date ts;
        if (value instanceof Date) {
            ts = (Date) value;
        } else {
            ts = ((WritableDateObjectInspector) outputOI).getPrimitiveJavaObject(value);
        }
        long l = DateWritable.dateToDays(ts);
        lv.vector[i] = l;
    } else if (outputOI instanceof WritableBooleanObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Boolean) {
            lv.vector[i] = (Boolean) value ? 1 : 0;
        } else {
            lv.vector[i] = ((WritableBooleanObjectInspector) outputOI).get(value) ? 1 : 0;
        }
    } else if (outputOI instanceof WritableHiveDecimalObjectInspector) {
        DecimalColumnVector dcv = (DecimalColumnVector) colVec;
        if (value instanceof HiveDecimal) {
            dcv.set(i, (HiveDecimal) value);
        } else {
            HiveDecimal hd = ((WritableHiveDecimalObjectInspector) outputOI).getPrimitiveJavaObject(value);
            dcv.set(i, hd);
        }
    } else if (outputOI instanceof WritableBinaryObjectInspector) {
        BytesWritable bw = (BytesWritable) value;
        BytesColumnVector bv = (BytesColumnVector) colVec;
        bv.setVal(i, bw.getBytes(), 0, bw.getLength());
    } else {
        throw new RuntimeException("Unhandled object type " + outputOI.getTypeName() + " inspector class " + outputOI.getClass().getName() + " value class " + value.getClass().getName());
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Timestamp(java.sql.Timestamp) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) Date(java.sql.Date) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)

Example 17 with CharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo in project hive by apache.

the class DDLSemanticAnalyzer method getTypeName.

public static String getTypeName(ASTNode node) throws SemanticException {
    int token = node.getType();
    String typeName;
    // datetime type isn't currently supported
    if (token == HiveParser.TOK_DATETIME) {
        throw new SemanticException(ErrorMsg.UNSUPPORTED_TYPE.getMsg());
    }
    switch(token) {
        case HiveParser.TOK_CHAR:
            CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(node);
            typeName = charTypeInfo.getQualifiedName();
            break;
        case HiveParser.TOK_VARCHAR:
            VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(node);
            typeName = varcharTypeInfo.getQualifiedName();
            break;
        case HiveParser.TOK_DECIMAL:
            DecimalTypeInfo decTypeInfo = ParseUtils.getDecimalTypeTypeInfo(node);
            typeName = decTypeInfo.getQualifiedName();
            break;
        default:
            typeName = TokenToTypeName.get(token);
    }
    return typeName;
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)

Example 18 with CharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo in project hive by apache.

the class TestHiveAccumuloTypes method testBinaryTypes.

@Test
public void testBinaryTypes() throws Exception {
    final String tableName = test.getMethodName(), user = "root", pass = "";
    MockInstance mockInstance = new MockInstance(test.getMethodName());
    Connector conn = mockInstance.getConnector(user, new PasswordToken(pass));
    HiveAccumuloTableInputFormat inputformat = new HiveAccumuloTableInputFormat();
    JobConf conf = new JobConf();
    conf.set(AccumuloSerDeParameters.TABLE_NAME, tableName);
    conf.set(AccumuloSerDeParameters.USE_MOCK_INSTANCE, "true");
    conf.set(AccumuloSerDeParameters.INSTANCE_NAME, test.getMethodName());
    conf.set(AccumuloSerDeParameters.USER_NAME, user);
    conf.set(AccumuloSerDeParameters.USER_PASS, pass);
    // not used for mock, but
    conf.set(AccumuloSerDeParameters.ZOOKEEPERS, "localhost:2181");
    // required by input format.
    conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, AccumuloHiveConstants.ROWID + ",cf:string,cf:boolean,cf:tinyint,cf:smallint,cf:int,cf:bigint" + ",cf:float,cf:double,cf:decimal,cf:date,cf:timestamp,cf:char,cf:varchar");
    conf.set(serdeConstants.LIST_COLUMNS, "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)");
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)");
    conf.set(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, "binary");
    conn.tableOperations().create(tableName);
    BatchWriterConfig writerConf = new BatchWriterConfig();
    BatchWriter writer = conn.createBatchWriter(tableName, writerConf);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    String cf = "cf";
    byte[] cfBytes = cf.getBytes();
    Mutation m = new Mutation("row1");
    // string
    String stringValue = "string";
    JavaStringObjectInspector stringOI = (JavaStringObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME));
    LazyUtils.writePrimitiveUTF8(baos, stringOI.create(stringValue), stringOI, false, (byte) 0, null);
    m.put(cfBytes, "string".getBytes(), baos.toByteArray());
    // boolean
    boolean booleanValue = true;
    baos.reset();
    JavaBooleanObjectInspector booleanOI = (JavaBooleanObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
    LazyUtils.writePrimitive(baos, booleanOI.create(booleanValue), booleanOI);
    m.put(cfBytes, "boolean".getBytes(), baos.toByteArray());
    // tinyint
    byte tinyintValue = -127;
    baos.reset();
    JavaByteObjectInspector byteOI = (JavaByteObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, tinyintValue, byteOI);
    m.put(cfBytes, "tinyint".getBytes(), baos.toByteArray());
    // smallint
    short smallintValue = Short.MAX_VALUE;
    baos.reset();
    JavaShortObjectInspector shortOI = (JavaShortObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, smallintValue, shortOI);
    m.put(cfBytes, "smallint".getBytes(), baos.toByteArray());
    // int
    int intValue = Integer.MAX_VALUE;
    baos.reset();
    JavaIntObjectInspector intOI = (JavaIntObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, intValue, intOI);
    m.put(cfBytes, "int".getBytes(), baos.toByteArray());
    // bigint
    long bigintValue = Long.MAX_VALUE;
    baos.reset();
    JavaLongObjectInspector longOI = (JavaLongObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, bigintValue, longOI);
    m.put(cfBytes, "bigint".getBytes(), baos.toByteArray());
    // float
    float floatValue = Float.MAX_VALUE;
    baos.reset();
    JavaFloatObjectInspector floatOI = (JavaFloatObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, floatValue, floatOI);
    m.put(cfBytes, "float".getBytes(), baos.toByteArray());
    // double
    double doubleValue = Double.MAX_VALUE;
    baos.reset();
    JavaDoubleObjectInspector doubleOI = (JavaDoubleObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME));
    LazyUtils.writePrimitive(baos, doubleValue, doubleOI);
    m.put(cfBytes, "double".getBytes(), baos.toByteArray());
    // decimal
    baos.reset();
    HiveDecimal decimalValue = HiveDecimal.create(65536l);
    HiveDecimalWritable decimalWritable = new HiveDecimalWritable(decimalValue);
    decimalWritable.write(out);
    m.put(cfBytes, "decimal".getBytes(), baos.toByteArray());
    // date
    baos.reset();
    Date now = new Date(System.currentTimeMillis());
    DateWritable dateWritable = new DateWritable(now);
    Date dateValue = dateWritable.get();
    dateWritable.write(out);
    m.put(cfBytes, "date".getBytes(), baos.toByteArray());
    // tiemestamp
    baos.reset();
    Timestamp timestampValue = new Timestamp(now.getTime());
    ByteStream.Output output = new ByteStream.Output();
    TimestampWritable timestampWritable = new TimestampWritable(new Timestamp(now.getTime()));
    timestampWritable.write(new DataOutputStream(output));
    output.close();
    m.put(cfBytes, "timestamp".getBytes(), output.toByteArray());
    // char
    baos.reset();
    HiveChar charValue = new HiveChar("char", 4);
    JavaHiveCharObjectInspector charOI = (JavaHiveCharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(new CharTypeInfo(4));
    LazyUtils.writePrimitiveUTF8(baos, charOI.create(charValue), charOI, false, (byte) 0, null);
    m.put(cfBytes, "char".getBytes(), baos.toByteArray());
    baos.reset();
    HiveVarchar varcharValue = new HiveVarchar("varchar", 7);
    JavaHiveVarcharObjectInspector varcharOI = (JavaHiveVarcharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(new VarcharTypeInfo(7));
    LazyUtils.writePrimitiveUTF8(baos, varcharOI.create(varcharValue), varcharOI, false, (byte) 0, null);
    m.put(cfBytes, "varchar".getBytes(), baos.toByteArray());
    writer.addMutation(m);
    writer.close();
    for (Entry<Key, Value> e : conn.createScanner(tableName, new Authorizations())) {
        System.out.println(e);
    }
    // Create the RecordReader
    FileInputFormat.addInputPath(conf, new Path("unused"));
    InputSplit[] splits = inputformat.getSplits(conf, 0);
    assertEquals(splits.length, 1);
    RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null);
    Text key = reader.createKey();
    AccumuloHiveRow value = reader.createValue();
    reader.next(key, value);
    Assert.assertEquals(13, value.getTuples().size());
    ByteArrayRef byteRef = new ByteArrayRef();
    // string
    Text cfText = new Text(cf), cqHolder = new Text();
    cqHolder.set("string");
    byte[] valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyStringObjectInspector lazyStringOI = LazyPrimitiveObjectInspectorFactory.getLazyStringObjectInspector(false, (byte) 0);
    LazyString lazyString = (LazyString) LazyFactory.createLazyObject(lazyStringOI);
    lazyString.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(stringValue, lazyString.getWritableObject().toString());
    // boolean
    cqHolder.set("boolean");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyBooleanObjectInspector lazyBooleanOI = (LazyBooleanObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
    LazyBoolean lazyBoolean = (LazyBoolean) LazyFactory.createLazyPrimitiveBinaryClass(lazyBooleanOI);
    lazyBoolean.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(booleanValue, lazyBoolean.getWritableObject().get());
    // tinyint
    cqHolder.set("tinyint");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyByteObjectInspector lazyByteOI = (LazyByteObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME));
    LazyByte lazyByte = (LazyByte) LazyFactory.createLazyPrimitiveBinaryClass(lazyByteOI);
    lazyByte.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(tinyintValue, lazyByte.getWritableObject().get());
    // smallint
    cqHolder.set("smallint");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyShortObjectInspector lazyShortOI = (LazyShortObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME));
    LazyShort lazyShort = (LazyShort) LazyFactory.createLazyPrimitiveBinaryClass(lazyShortOI);
    lazyShort.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(smallintValue, lazyShort.getWritableObject().get());
    // int
    cqHolder.set("int");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyIntObjectInspector lazyIntOI = (LazyIntObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME));
    LazyInteger lazyInt = (LazyInteger) LazyFactory.createLazyPrimitiveBinaryClass(lazyIntOI);
    lazyInt.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(intValue, lazyInt.getWritableObject().get());
    // bigint
    cqHolder.set("bigint");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyLongObjectInspector lazyLongOI = (LazyLongObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME));
    LazyLong lazyLong = (LazyLong) LazyFactory.createLazyPrimitiveBinaryClass(lazyLongOI);
    lazyLong.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(bigintValue, lazyLong.getWritableObject().get());
    // float
    cqHolder.set("float");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyFloatObjectInspector lazyFloatOI = (LazyFloatObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME));
    LazyFloat lazyFloat = (LazyFloat) LazyFactory.createLazyPrimitiveBinaryClass(lazyFloatOI);
    lazyFloat.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(floatValue, lazyFloat.getWritableObject().get(), 0);
    // double
    cqHolder.set("double");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyDoubleObjectInspector lazyDoubleOI = (LazyDoubleObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME));
    LazyDouble lazyDouble = (LazyDouble) LazyFactory.createLazyPrimitiveBinaryClass(lazyDoubleOI);
    lazyDouble.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(doubleValue, lazyDouble.getWritableObject().get(), 0);
    // decimal
    cqHolder.set("decimal");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    ByteArrayInputStream bais = new ByteArrayInputStream(valueBytes);
    DataInputStream in = new DataInputStream(bais);
    decimalWritable.readFields(in);
    Assert.assertEquals(decimalValue, decimalWritable.getHiveDecimal());
    // date
    cqHolder.set("date");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    bais = new ByteArrayInputStream(valueBytes);
    in = new DataInputStream(bais);
    dateWritable.readFields(in);
    Assert.assertEquals(dateValue, dateWritable.get());
    // timestamp
    cqHolder.set("timestamp");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    bais = new ByteArrayInputStream(valueBytes);
    in = new DataInputStream(bais);
    timestampWritable.readFields(in);
    Assert.assertEquals(timestampValue, timestampWritable.getTimestamp());
    // char
    cqHolder.set("char");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyHiveCharObjectInspector lazyCharOI = (LazyHiveCharObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(new CharTypeInfo(4));
    LazyHiveChar lazyChar = (LazyHiveChar) LazyFactory.createLazyObject(lazyCharOI);
    lazyChar.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(charValue, lazyChar.getWritableObject().getHiveChar());
    // varchar
    cqHolder.set("varchar");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyHiveVarcharObjectInspector lazyVarcharOI = (LazyHiveVarcharObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(new VarcharTypeInfo(7));
    LazyHiveVarchar lazyVarchar = (LazyHiveVarchar) LazyFactory.createLazyObject(lazyVarcharOI);
    lazyVarchar.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(varcharValue.toString(), lazyVarchar.getWritableObject().getHiveVarchar().toString());
}
Also used : LazyHiveVarchar(org.apache.hadoop.hive.serde2.lazy.LazyHiveVarchar) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) LazyIntObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector) LazyDoubleObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) LazyHiveChar(org.apache.hadoop.hive.serde2.lazy.LazyHiveChar) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) ByteStream(org.apache.hadoop.hive.serde2.ByteStream) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) JobConf(org.apache.hadoop.mapred.JobConf) JavaHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector) LazyShortObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector) Authorizations(org.apache.accumulo.core.security.Authorizations) LazyLong(org.apache.hadoop.hive.serde2.lazy.LazyLong) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) JavaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaLongObjectInspector) LazyHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector) LazyBoolean(org.apache.hadoop.hive.serde2.lazy.LazyBoolean) LazyLongObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector) LazyByte(org.apache.hadoop.hive.serde2.lazy.LazyByte) JavaHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector) JavaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaFloatObjectInspector) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ByteArrayInputStream(java.io.ByteArrayInputStream) Value(org.apache.accumulo.core.data.Value) LazyInteger(org.apache.hadoop.hive.serde2.lazy.LazyInteger) Mutation(org.apache.accumulo.core.data.Mutation) LazyDouble(org.apache.hadoop.hive.serde2.lazy.LazyDouble) Key(org.apache.accumulo.core.data.Key) Connector(org.apache.accumulo.core.client.Connector) LazyHiveCharObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveCharObjectInspector) JavaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaIntObjectInspector) DataOutputStream(java.io.DataOutputStream) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) LazyHiveChar(org.apache.hadoop.hive.serde2.lazy.LazyHiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) LazyBooleanObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector) LazyFloat(org.apache.hadoop.hive.serde2.lazy.LazyFloat) Timestamp(java.sql.Timestamp) LazyTimestamp(org.apache.hadoop.hive.serde2.lazy.LazyTimestamp) JavaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDoubleObjectInspector) LazyShort(org.apache.hadoop.hive.serde2.lazy.LazyShort) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) JavaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaByteObjectInspector) LazyHiveDecimal(org.apache.hadoop.hive.serde2.lazy.LazyHiveDecimal) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) JavaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBooleanObjectInspector) InputSplit(org.apache.hadoop.mapred.InputSplit) Path(org.apache.hadoop.fs.Path) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) LazyHiveVarchar(org.apache.hadoop.hive.serde2.lazy.LazyHiveVarchar) LazyFloatObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector) DataInputStream(java.io.DataInputStream) LazyDate(org.apache.hadoop.hive.serde2.lazy.LazyDate) Date(java.sql.Date) LazyByteObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector) BatchWriter(org.apache.accumulo.core.client.BatchWriter) JavaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaShortObjectInspector) Test(org.junit.Test)

Example 19 with CharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo in project hive by apache.

the class HCatBaseStorer method getJavaObj.

/**
   * Convert from Pig value object to Hive value object
   * This method assumes that {@link #validateSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema, org.apache.hive.hcatalog.data.schema.HCatFieldSchema, org.apache.pig.impl.logicalLayer.schema.Schema, org.apache.hive.hcatalog.data.schema.HCatSchema, int)}
   * which checks the types in Pig schema are compatible with target Hive table, has been called.
   */
private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException {
    try {
        if (pigObj == null)
            return null;
        // The real work-horse. Spend time and energy in this method if there is
        // need to keep HCatStorer lean and go fast.
        Type type = hcatFS.getType();
        switch(type) {
            case BINARY:
                return ((DataByteArray) pigObj).get();
            case STRUCT:
                HCatSchema structSubSchema = hcatFS.getStructSubSchema();
                // Unwrap the tuple.
                List<Object> all = ((Tuple) pigObj).getAll();
                ArrayList<Object> converted = new ArrayList<Object>(all.size());
                for (int i = 0; i < all.size(); i++) {
                    converted.add(getJavaObj(all.get(i), structSubSchema.get(i)));
                }
                return converted;
            case ARRAY:
                // Unwrap the bag.
                DataBag pigBag = (DataBag) pigObj;
                HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
                boolean needTuple = tupFS.getType() == Type.STRUCT;
                List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
                Iterator<Tuple> bagItr = pigBag.iterator();
                while (bagItr.hasNext()) {
                    // If there is only one element in tuple contained in bag, we throw away the tuple.
                    bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));
                }
                return bagContents;
            case MAP:
                Map<?, ?> pigMap = (Map<?, ?>) pigObj;
                Map<Object, Object> typeMap = new HashMap<Object, Object>();
                for (Entry<?, ?> entry : pigMap.entrySet()) {
                    // the value has a schema and not a FieldSchema
                    typeMap.put(// Schema validation enforces that the Key is a String
                    (String) entry.getKey(), getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0)));
                }
                return typeMap;
            case STRING:
            case INT:
            case BIGINT:
            case FLOAT:
            case DOUBLE:
                return pigObj;
            case SMALLINT:
                if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return ((Integer) pigObj).shortValue();
            case TINYINT:
                if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return ((Integer) pigObj).byteValue();
            case BOOLEAN:
                if (pigObj instanceof String) {
                    if (((String) pigObj).trim().compareTo("0") == 0) {
                        return Boolean.FALSE;
                    }
                    if (((String) pigObj).trim().compareTo("1") == 0) {
                        return Boolean.TRUE;
                    }
                    throw new BackendException("Unexpected type " + type + " for value " + pigObj + " of class " + pigObj.getClass().getName(), PigHCatUtil.PIG_EXCEPTION_CODE);
                }
                return Boolean.parseBoolean(pigObj.toString());
            case DECIMAL:
                BigDecimal bd = (BigDecimal) pigObj;
                DecimalTypeInfo dti = (DecimalTypeInfo) hcatFS.getTypeInfo();
                if (bd.precision() > dti.precision() || bd.scale() > dti.scale()) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return HiveDecimal.create(bd);
            case CHAR:
                String charVal = (String) pigObj;
                CharTypeInfo cti = (CharTypeInfo) hcatFS.getTypeInfo();
                if (charVal.length() > cti.getLength()) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return new HiveChar(charVal, cti.getLength());
            case VARCHAR:
                String varcharVal = (String) pigObj;
                VarcharTypeInfo vti = (VarcharTypeInfo) hcatFS.getTypeInfo();
                if (varcharVal.length() > vti.getLength()) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return new HiveVarchar(varcharVal, vti.getLength());
            case TIMESTAMP:
                DateTime dt = (DateTime) pigObj;
                //getMillis() returns UTC time regardless of TZ
                return new Timestamp(dt.getMillis());
            case DATE:
                /**
         * We ignore any TZ setting on Pig value since java.sql.Date doesn't have it (in any
         * meaningful way).  So the assumption is that if Pig value has 0 time component (midnight)
         * we assume it reasonably 'fits' into a Hive DATE.  If time part is not 0, it's considered
         * out of range for target type.
         */
                DateTime dateTime = ((DateTime) pigObj);
                if (dateTime.getMillisOfDay() != 0) {
                    handleOutOfRangeValue(pigObj, hcatFS, "Time component must be 0 (midnight) in local timezone; Local TZ val='" + pigObj + "'");
                    return null;
                }
                /*java.sql.Date is a poorly defined API.  Some (all?) SerDes call toString() on it
        [e.g. LazySimpleSerDe, uses LazyUtils.writePrimitiveUTF8()],  which automatically adjusts
          for local timezone.  Date.valueOf() also uses local timezone (as does Date(int,int,int).
          Also see PigHCatUtil#extractPigObject() for corresponding read op.  This way a DATETIME from Pig,
          when stored into Hive and read back comes back with the same value.*/
                return new Date(dateTime.getYear() - 1900, dateTime.getMonthOfYear() - 1, dateTime.getDayOfMonth());
            default:
                throw new BackendException("Unexpected HCat type " + type + " for value " + pigObj + " of class " + pigObj.getClass().getName(), PigHCatUtil.PIG_EXCEPTION_CODE);
        }
    } catch (BackendException e) {
        // provide the path to the field in the error message
        throw new BackendException((hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), e);
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) Timestamp(java.sql.Timestamp) DateTime(org.joda.time.DateTime) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) DataByteArray(org.apache.pig.data.DataByteArray) DataBag(org.apache.pig.data.DataBag) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) BigDecimal(java.math.BigDecimal) Date(java.sql.Date) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) BackendException(org.apache.pig.backend.BackendException) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DataType(org.apache.pig.data.DataType) Type(org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type) Map(java.util.Map) HashMap(java.util.HashMap) Tuple(org.apache.pig.data.Tuple)

Example 20 with CharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo in project presto by prestodb.

the class OrcTester method preprocessWriteValueOld.

private static Object preprocessWriteValueOld(TypeInfo typeInfo, Object value) {
    if (value == null) {
        return null;
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
            switch(primitiveCategory) {
                case BOOLEAN:
                    return value;
                case BYTE:
                    return ((Number) value).byteValue();
                case SHORT:
                    return ((Number) value).shortValue();
                case INT:
                    return ((Number) value).intValue();
                case LONG:
                    return ((Number) value).longValue();
                case FLOAT:
                    return ((Number) value).floatValue();
                case DOUBLE:
                    return ((Number) value).doubleValue();
                case DECIMAL:
                    return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
                case STRING:
                    return value;
                case CHAR:
                    return new HiveChar(value.toString(), ((CharTypeInfo) typeInfo).getLength());
                case DATE:
                    int days = ((SqlDate) value).getDays();
                    LocalDate localDate = LocalDate.ofEpochDay(days);
                    ZonedDateTime zonedDateTime = localDate.atStartOfDay(ZoneId.systemDefault());
                    long millis = zonedDateTime.toEpochSecond() * 1000;
                    Date date = new Date(0);
                    // mills must be set separately to avoid masking
                    date.setTime(millis);
                    return date;
                case TIMESTAMP:
                    long millisUtc = (int) ((SqlTimestamp) value).getMillisUtc();
                    return new Timestamp(millisUtc);
                case BINARY:
                    return ((SqlVarbinary) value).getBytes();
            }
            break;
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
            TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
            Map<Object, Object> newMap = new HashMap<>();
            for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
                newMap.put(preprocessWriteValueOld(keyTypeInfo, entry.getKey()), preprocessWriteValueOld(valueTypeInfo, entry.getValue()));
            }
            return newMap;
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
            List<Object> newList = new ArrayList<>(((Collection<?>) value).size());
            for (Object element : (Iterable<?>) value) {
                newList.add(preprocessWriteValueOld(elementTypeInfo, element));
            }
            return newList;
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<?> fieldValues = (List<?>) value;
            List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
            List<Object> newStruct = new ArrayList<>();
            for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
                newStruct.add(preprocessWriteValueOld(fieldTypeInfos.get(fieldId), fieldValues.get(fieldId)));
            }
            return newStruct;
    }
    throw new PrestoException(NOT_SUPPORTED, format("Unsupported Hive type: %s", typeInfo));
}
Also used : HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SqlVarbinary(com.facebook.presto.spi.type.SqlVarbinary) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrestoException(com.facebook.presto.spi.PrestoException) LocalDate(java.time.LocalDate) SqlTimestamp(com.facebook.presto.spi.type.SqlTimestamp) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ZonedDateTime(java.time.ZonedDateTime) Arrays.asList(java.util.Arrays.asList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) SqlDate(com.facebook.presto.spi.type.SqlDate) LocalDate(java.time.LocalDate) Date(java.sql.Date) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) SqlDate(com.facebook.presto.spi.type.SqlDate) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Aggregations

CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)24 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)19 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)17 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)13 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)13 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)12 Timestamp (java.sql.Timestamp)11 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)9 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)9 Text (org.apache.hadoop.io.Text)9 Test (org.junit.Test)9 Date (java.sql.Date)8 HiveCharWritable (org.apache.hadoop.hive.serde2.io.HiveCharWritable)8 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)7 ArrayList (java.util.ArrayList)5 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)5 BytesWritable (org.apache.hadoop.io.BytesWritable)5 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)4 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)4 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)4