Search in sources :

Example 11 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class TestHiveAccumuloTypes method testBinaryTypes.

@Test
public void testBinaryTypes() throws Exception {
    final String tableName = test.getMethodName(), user = "root", pass = "";
    MockInstance mockInstance = new MockInstance(test.getMethodName());
    Connector conn = mockInstance.getConnector(user, new PasswordToken(pass));
    HiveAccumuloTableInputFormat inputformat = new HiveAccumuloTableInputFormat();
    JobConf conf = new JobConf();
    conf.set(AccumuloSerDeParameters.TABLE_NAME, tableName);
    conf.set(AccumuloSerDeParameters.USE_MOCK_INSTANCE, "true");
    conf.set(AccumuloSerDeParameters.INSTANCE_NAME, test.getMethodName());
    conf.set(AccumuloSerDeParameters.USER_NAME, user);
    conf.set(AccumuloSerDeParameters.USER_PASS, pass);
    // not used for mock, but
    conf.set(AccumuloSerDeParameters.ZOOKEEPERS, "localhost:2181");
    // required by input format.
    conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, AccumuloHiveConstants.ROWID + ",cf:string,cf:boolean,cf:tinyint,cf:smallint,cf:int,cf:bigint" + ",cf:float,cf:double,cf:decimal,cf:date,cf:timestamp,cf:char,cf:varchar");
    conf.set(serdeConstants.LIST_COLUMNS, "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)");
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)");
    conf.set(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, "binary");
    conn.tableOperations().create(tableName);
    BatchWriterConfig writerConf = new BatchWriterConfig();
    BatchWriter writer = conn.createBatchWriter(tableName, writerConf);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    String cf = "cf";
    byte[] cfBytes = cf.getBytes();
    Mutation m = new Mutation("row1");
    // string
    String stringValue = "string";
    JavaStringObjectInspector stringOI = (JavaStringObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME));
    LazyUtils.writePrimitiveUTF8(baos, stringOI.create(stringValue), stringOI, false, (byte) 0, null);
    m.put(cfBytes, "string".getBytes(), baos.toByteArray());
    // boolean
    boolean booleanValue = true;
    baos.reset();
    JavaBooleanObjectInspector booleanOI = (JavaBooleanObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
    LazyUtils.writePrimitive(baos, booleanOI.create(booleanValue), booleanOI);
    m.put(cfBytes, "boolean".getBytes(), baos.toByteArray());
    // tinyint
    byte tinyintValue = -127;
    baos.reset();
    JavaByteObjectInspector byteOI = (JavaByteObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, tinyintValue, byteOI);
    m.put(cfBytes, "tinyint".getBytes(), baos.toByteArray());
    // smallint
    short smallintValue = Short.MAX_VALUE;
    baos.reset();
    JavaShortObjectInspector shortOI = (JavaShortObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, smallintValue, shortOI);
    m.put(cfBytes, "smallint".getBytes(), baos.toByteArray());
    // int
    int intValue = Integer.MAX_VALUE;
    baos.reset();
    JavaIntObjectInspector intOI = (JavaIntObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, intValue, intOI);
    m.put(cfBytes, "int".getBytes(), baos.toByteArray());
    // bigint
    long bigintValue = Long.MAX_VALUE;
    baos.reset();
    JavaLongObjectInspector longOI = (JavaLongObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, bigintValue, longOI);
    m.put(cfBytes, "bigint".getBytes(), baos.toByteArray());
    // float
    float floatValue = Float.MAX_VALUE;
    baos.reset();
    JavaFloatObjectInspector floatOI = (JavaFloatObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, floatValue, floatOI);
    m.put(cfBytes, "float".getBytes(), baos.toByteArray());
    // double
    double doubleValue = Double.MAX_VALUE;
    baos.reset();
    JavaDoubleObjectInspector doubleOI = (JavaDoubleObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME));
    LazyUtils.writePrimitive(baos, doubleValue, doubleOI);
    m.put(cfBytes, "double".getBytes(), baos.toByteArray());
    // decimal
    baos.reset();
    HiveDecimal decimalValue = HiveDecimal.create(65536l);
    HiveDecimalWritable decimalWritable = new HiveDecimalWritable(decimalValue);
    decimalWritable.write(out);
    m.put(cfBytes, "decimal".getBytes(), baos.toByteArray());
    // date
    baos.reset();
    Date now = Date.ofEpochMilli(System.currentTimeMillis());
    DateWritableV2 dateWritable = new DateWritableV2(now);
    Date dateValue = dateWritable.get();
    dateWritable.write(out);
    m.put(cfBytes, "date".getBytes(), baos.toByteArray());
    // tiemestamp
    baos.reset();
    Timestamp timestampValue = Timestamp.ofEpochMilli(System.currentTimeMillis());
    ByteStream.Output output = new ByteStream.Output();
    TimestampWritableV2 timestampWritable = new TimestampWritableV2(timestampValue);
    timestampWritable.write(new DataOutputStream(output));
    output.close();
    m.put(cfBytes, "timestamp".getBytes(), output.toByteArray());
    // char
    baos.reset();
    HiveChar charValue = new HiveChar("char", 4);
    JavaHiveCharObjectInspector charOI = (JavaHiveCharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(new CharTypeInfo(4));
    LazyUtils.writePrimitiveUTF8(baos, charOI.create(charValue), charOI, false, (byte) 0, null);
    m.put(cfBytes, "char".getBytes(), baos.toByteArray());
    baos.reset();
    HiveVarchar varcharValue = new HiveVarchar("varchar", 7);
    JavaHiveVarcharObjectInspector varcharOI = (JavaHiveVarcharObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(new VarcharTypeInfo(7));
    LazyUtils.writePrimitiveUTF8(baos, varcharOI.create(varcharValue), varcharOI, false, (byte) 0, null);
    m.put(cfBytes, "varchar".getBytes(), baos.toByteArray());
    writer.addMutation(m);
    writer.close();
    for (Entry<Key, Value> e : conn.createScanner(tableName, new Authorizations())) {
        System.out.println(e);
    }
    // Create the RecordReader
    FileInputFormat.addInputPath(conf, new Path("unused"));
    InputSplit[] splits = inputformat.getSplits(conf, 0);
    assertEquals(splits.length, 1);
    RecordReader<Text, AccumuloHiveRow> reader = inputformat.getRecordReader(splits[0], conf, null);
    Text key = reader.createKey();
    AccumuloHiveRow value = reader.createValue();
    reader.next(key, value);
    Assert.assertEquals(13, value.getTuples().size());
    ByteArrayRef byteRef = new ByteArrayRef();
    // string
    Text cfText = new Text(cf), cqHolder = new Text();
    cqHolder.set("string");
    byte[] valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyStringObjectInspector lazyStringOI = LazyPrimitiveObjectInspectorFactory.getLazyStringObjectInspector(false, (byte) 0);
    LazyString lazyString = (LazyString) LazyFactory.createLazyObject(lazyStringOI);
    lazyString.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(stringValue, lazyString.getWritableObject().toString());
    // boolean
    cqHolder.set("boolean");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyBooleanObjectInspector lazyBooleanOI = (LazyBooleanObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME));
    LazyBoolean lazyBoolean = (LazyBoolean) LazyFactory.createLazyPrimitiveBinaryClass(lazyBooleanOI);
    lazyBoolean.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(booleanValue, lazyBoolean.getWritableObject().get());
    // tinyint
    cqHolder.set("tinyint");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyByteObjectInspector lazyByteOI = (LazyByteObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME));
    LazyByte lazyByte = (LazyByte) LazyFactory.createLazyPrimitiveBinaryClass(lazyByteOI);
    lazyByte.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(tinyintValue, lazyByte.getWritableObject().get());
    // smallint
    cqHolder.set("smallint");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyShortObjectInspector lazyShortOI = (LazyShortObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME));
    LazyShort lazyShort = (LazyShort) LazyFactory.createLazyPrimitiveBinaryClass(lazyShortOI);
    lazyShort.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(smallintValue, lazyShort.getWritableObject().get());
    // int
    cqHolder.set("int");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyIntObjectInspector lazyIntOI = (LazyIntObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME));
    LazyInteger lazyInt = (LazyInteger) LazyFactory.createLazyPrimitiveBinaryClass(lazyIntOI);
    lazyInt.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(intValue, lazyInt.getWritableObject().get());
    // bigint
    cqHolder.set("bigint");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyLongObjectInspector lazyLongOI = (LazyLongObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME));
    LazyLong lazyLong = (LazyLong) LazyFactory.createLazyPrimitiveBinaryClass(lazyLongOI);
    lazyLong.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(bigintValue, lazyLong.getWritableObject().get());
    // float
    cqHolder.set("float");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyFloatObjectInspector lazyFloatOI = (LazyFloatObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME));
    LazyFloat lazyFloat = (LazyFloat) LazyFactory.createLazyPrimitiveBinaryClass(lazyFloatOI);
    lazyFloat.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(floatValue, lazyFloat.getWritableObject().get(), 0);
    // double
    cqHolder.set("double");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyDoubleObjectInspector lazyDoubleOI = (LazyDoubleObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME));
    LazyDouble lazyDouble = (LazyDouble) LazyFactory.createLazyPrimitiveBinaryClass(lazyDoubleOI);
    lazyDouble.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(doubleValue, lazyDouble.getWritableObject().get(), 0);
    // decimal
    cqHolder.set("decimal");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    ByteArrayInputStream bais = new ByteArrayInputStream(valueBytes);
    DataInputStream in = new DataInputStream(bais);
    decimalWritable.readFields(in);
    Assert.assertEquals(decimalValue, decimalWritable.getHiveDecimal());
    // date
    cqHolder.set("date");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    bais = new ByteArrayInputStream(valueBytes);
    in = new DataInputStream(bais);
    dateWritable.readFields(in);
    Assert.assertEquals(dateValue, dateWritable.get());
    // timestamp
    cqHolder.set("timestamp");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    bais = new ByteArrayInputStream(valueBytes);
    in = new DataInputStream(bais);
    timestampWritable.readFields(in);
    Assert.assertEquals(timestampValue, timestampWritable.getTimestamp());
    // char
    cqHolder.set("char");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyHiveCharObjectInspector lazyCharOI = (LazyHiveCharObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(new CharTypeInfo(4));
    LazyHiveChar lazyChar = (LazyHiveChar) LazyFactory.createLazyObject(lazyCharOI);
    lazyChar.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(charValue, lazyChar.getWritableObject().getHiveChar());
    // varchar
    cqHolder.set("varchar");
    valueBytes = value.getValue(cfText, cqHolder);
    Assert.assertNotNull(valueBytes);
    byteRef.setData(valueBytes);
    LazyHiveVarcharObjectInspector lazyVarcharOI = (LazyHiveVarcharObjectInspector) LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(new VarcharTypeInfo(7));
    LazyHiveVarchar lazyVarchar = (LazyHiveVarchar) LazyFactory.createLazyObject(lazyVarcharOI);
    lazyVarchar.init(byteRef, 0, valueBytes.length);
    Assert.assertEquals(varcharValue.toString(), lazyVarchar.getWritableObject().getHiveVarchar().toString());
}
Also used : LazyHiveVarchar(org.apache.hadoop.hive.serde2.lazy.LazyHiveVarchar) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) LazyIntObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector) LazyDoubleObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) LazyHiveChar(org.apache.hadoop.hive.serde2.lazy.LazyHiveChar) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) ByteStream(org.apache.hadoop.hive.serde2.ByteStream) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) JobConf(org.apache.hadoop.mapred.JobConf) JavaHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector) LazyShortObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector) Authorizations(org.apache.accumulo.core.security.Authorizations) LazyLong(org.apache.hadoop.hive.serde2.lazy.LazyLong) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) JavaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) JavaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaLongObjectInspector) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) LazyHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector) LazyBoolean(org.apache.hadoop.hive.serde2.lazy.LazyBoolean) LazyLongObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector) LazyByte(org.apache.hadoop.hive.serde2.lazy.LazyByte) JavaHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector) JavaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaFloatObjectInspector) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ByteArrayInputStream(java.io.ByteArrayInputStream) Value(org.apache.accumulo.core.data.Value) LazyInteger(org.apache.hadoop.hive.serde2.lazy.LazyInteger) Mutation(org.apache.accumulo.core.data.Mutation) LazyDouble(org.apache.hadoop.hive.serde2.lazy.LazyDouble) Key(org.apache.accumulo.core.data.Key) Connector(org.apache.accumulo.core.client.Connector) LazyHiveCharObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveCharObjectInspector) JavaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaIntObjectInspector) DataOutputStream(java.io.DataOutputStream) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) LazyHiveChar(org.apache.hadoop.hive.serde2.lazy.LazyHiveChar) LazyBooleanObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector) LazyFloat(org.apache.hadoop.hive.serde2.lazy.LazyFloat) LazyTimestamp(org.apache.hadoop.hive.serde2.lazy.LazyTimestamp) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) JavaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDoubleObjectInspector) LazyShort(org.apache.hadoop.hive.serde2.lazy.LazyShort) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) JavaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaByteObjectInspector) LazyHiveDecimal(org.apache.hadoop.hive.serde2.lazy.LazyHiveDecimal) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) JavaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBooleanObjectInspector) InputSplit(org.apache.hadoop.mapred.InputSplit) Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) LazyHiveVarchar(org.apache.hadoop.hive.serde2.lazy.LazyHiveVarchar) LazyFloatObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector) DataInputStream(java.io.DataInputStream) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Date(org.apache.hadoop.hive.common.type.Date) LazyDate(org.apache.hadoop.hive.serde2.lazy.LazyDate) LazyByteObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector) BatchWriter(org.apache.accumulo.core.client.BatchWriter) JavaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaShortObjectInspector) Test(org.junit.Test)

Example 12 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class HCatBaseStorer method getJavaObj.

/**
 * Convert from Pig value object to Hive value object
 * This method assumes that {@link #validateSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema, org.apache.hive.hcatalog.data.schema.HCatFieldSchema, org.apache.pig.impl.logicalLayer.schema.Schema, org.apache.hive.hcatalog.data.schema.HCatSchema, int)}
 * which checks the types in Pig schema are compatible with target Hive table, has been called.
 */
private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException {
    try {
        if (pigObj == null)
            return null;
        // The real work-horse. Spend time and energy in this method if there is
        // need to keep HCatStorer lean and go fast.
        Type type = hcatFS.getType();
        switch(type) {
            case BINARY:
                return ((DataByteArray) pigObj).get();
            case STRUCT:
                HCatSchema structSubSchema = hcatFS.getStructSubSchema();
                // Unwrap the tuple.
                List<Object> all = ((Tuple) pigObj).getAll();
                ArrayList<Object> converted = new ArrayList<Object>(all.size());
                for (int i = 0; i < all.size(); i++) {
                    converted.add(getJavaObj(all.get(i), structSubSchema.get(i)));
                }
                return converted;
            case ARRAY:
                // Unwrap the bag.
                DataBag pigBag = (DataBag) pigObj;
                HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
                boolean needTuple = tupFS.getType() == Type.STRUCT;
                List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
                Iterator<Tuple> bagItr = pigBag.iterator();
                while (bagItr.hasNext()) {
                    // If there is only one element in tuple contained in bag, we throw away the tuple.
                    bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));
                }
                return bagContents;
            case MAP:
                Map<?, ?> pigMap = (Map<?, ?>) pigObj;
                Map<Object, Object> typeMap = new LinkedHashMap<Object, Object>();
                for (Entry<?, ?> entry : pigMap.entrySet()) {
                    // the value has a schema and not a FieldSchema
                    typeMap.put(// Schema validation enforces that the Key is a String
                    (String) entry.getKey(), getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0)));
                }
                return typeMap;
            case STRING:
            case INT:
            case BIGINT:
            case FLOAT:
            case DOUBLE:
                return pigObj;
            case SMALLINT:
                if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return ((Integer) pigObj).shortValue();
            case TINYINT:
                if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return ((Integer) pigObj).byteValue();
            case BOOLEAN:
                if (pigObj instanceof String) {
                    if (((String) pigObj).trim().compareTo("0") == 0) {
                        return Boolean.FALSE;
                    }
                    if (((String) pigObj).trim().compareTo("1") == 0) {
                        return Boolean.TRUE;
                    }
                    throw new BackendException("Unexpected type " + type + " for value " + pigObj + " of class " + pigObj.getClass().getName(), PigHCatUtil.PIG_EXCEPTION_CODE);
                }
                return Boolean.parseBoolean(pigObj.toString());
            case DECIMAL:
                BigDecimal bd = (BigDecimal) pigObj;
                DecimalTypeInfo dti = (DecimalTypeInfo) hcatFS.getTypeInfo();
                if (bd.precision() > dti.precision() || bd.scale() > dti.scale()) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return HiveDecimal.create(bd);
            case CHAR:
                String charVal = (String) pigObj;
                CharTypeInfo cti = (CharTypeInfo) hcatFS.getTypeInfo();
                if (charVal.length() > cti.getLength()) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return new HiveChar(charVal, cti.getLength());
            case VARCHAR:
                String varcharVal = (String) pigObj;
                VarcharTypeInfo vti = (VarcharTypeInfo) hcatFS.getTypeInfo();
                if (varcharVal.length() > vti.getLength()) {
                    handleOutOfRangeValue(pigObj, hcatFS);
                    return null;
                }
                return new HiveVarchar(varcharVal, vti.getLength());
            case TIMESTAMP:
                DateTime dt = (DateTime) pigObj;
                // toEpochMilli() returns UTC time regardless of TZ
                return Timestamp.ofEpochMilli(dt.getMillis());
            case DATE:
                /**
                 * We ignore any TZ setting on Pig value since java.sql.Date doesn't have it (in any
                 * meaningful way).  So the assumption is that if Pig value has 0 time component (midnight)
                 * we assume it reasonably 'fits' into a Hive DATE.  If time part is not 0, it's considered
                 * out of range for target type.
                 */
                DateTime dateTime = ((DateTime) pigObj);
                if (dateTime.getMillisOfDay() != 0) {
                    handleOutOfRangeValue(pigObj, hcatFS, "Time component must be 0 (midnight) in local timezone; Local TZ val='" + pigObj + "'");
                    return null;
                }
                /*java.sql.Date is a poorly defined API.  Some (all?) SerDes call toString() on it
        [e.g. LazySimpleSerDe, uses LazyUtils.writePrimitiveUTF8()],  which automatically adjusts
          for local timezone.  Date.valueOf() also uses local timezone (as does Date(int,int,int).
          Also see PigHCatUtil#extractPigObject() for corresponding read op.  This way a DATETIME from Pig,
          when stored into Hive and read back comes back with the same value.*/
                return Date.of(dateTime.getYear(), dateTime.getMonthOfYear(), dateTime.getDayOfMonth());
            default:
                throw new BackendException("Unexpected HCat type " + type + " for value " + pigObj + " of class " + pigObj.getClass().getName(), PigHCatUtil.PIG_EXCEPTION_CODE);
        }
    } catch (BackendException e) {
        // provide the path to the field in the error message
        throw new BackendException((hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), e);
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ArrayList(java.util.ArrayList) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) DateTime(org.joda.time.DateTime) LinkedHashMap(java.util.LinkedHashMap) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) DataByteArray(org.apache.pig.data.DataByteArray) DataBag(org.apache.pig.data.DataBag) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) BigDecimal(java.math.BigDecimal) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) BackendException(org.apache.pig.backend.BackendException) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) DataType(org.apache.pig.data.DataType) Type(org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Tuple(org.apache.pig.data.Tuple)

Example 13 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class TestDefaultHCatRecord method getHCat13TypesRecord.

private static HCatRecord getHCat13TypesRecord() {
    List<Object> rec_hcat13types = new ArrayList<Object>(5);
    // prec 5, scale 2
    rec_hcat13types.add(HiveDecimal.create(new BigDecimal("123.45")));
    rec_hcat13types.add(new HiveChar("hive_char", 10));
    rec_hcat13types.add(new HiveVarchar("hive_varchar", 20));
    rec_hcat13types.add(Date.valueOf("2014-01-06"));
    rec_hcat13types.add(Timestamp.ofEpochMilli(System.currentTimeMillis()));
    return new DefaultHCatRecord(rec_hcat13types);
}
Also used : ArrayList(java.util.ArrayList) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) BigDecimal(java.math.BigDecimal)

Example 14 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class ReaderWriter method writeDatum.

public static void writeDatum(DataOutput out, Object val) throws IOException {
    // write the data type
    byte type = DataType.findType(val);
    out.write(type);
    switch(type) {
        case DataType.LIST:
            List<?> list = (List<?>) val;
            int sz = list.size();
            out.writeInt(sz);
            for (int i = 0; i < sz; i++) {
                writeDatum(out, list.get(i));
            }
            return;
        case DataType.MAP:
            Map<?, ?> m = (Map<?, ?>) val;
            out.writeInt(m.size());
            Iterator<?> i = m.entrySet().iterator();
            while (i.hasNext()) {
                Entry<?, ?> entry = (Entry<?, ?>) i.next();
                writeDatum(out, entry.getKey());
                writeDatum(out, entry.getValue());
            }
            return;
        case DataType.INTEGER:
            new VIntWritable((Integer) val).write(out);
            return;
        case DataType.LONG:
            new VLongWritable((Long) val).write(out);
            return;
        case DataType.FLOAT:
            out.writeFloat((Float) val);
            return;
        case DataType.DOUBLE:
            out.writeDouble((Double) val);
            return;
        case DataType.BOOLEAN:
            out.writeBoolean((Boolean) val);
            return;
        case DataType.BYTE:
            out.writeByte((Byte) val);
            return;
        case DataType.SHORT:
            out.writeShort((Short) val);
            return;
        case DataType.STRING:
            String s = (String) val;
            byte[] utfBytes = s.getBytes(ReaderWriter.UTF8);
            out.writeInt(utfBytes.length);
            out.write(utfBytes);
            return;
        case DataType.BINARY:
            byte[] ba = (byte[]) val;
            out.writeInt(ba.length);
            out.write(ba);
            return;
        case DataType.NULL:
            // for NULL we just write out the type
            return;
        case DataType.CHAR:
            new HiveCharWritable((HiveChar) val).write(out);
            return;
        case DataType.VARCHAR:
            new HiveVarcharWritable((HiveVarchar) val).write(out);
            return;
        case DataType.DECIMAL:
            new HiveDecimalWritable((HiveDecimal) val).write(out);
            return;
        case DataType.DATE:
            new DateWritableV2((Date) val).write(out);
            return;
        case DataType.TIMESTAMP:
            new TimestampWritableV2((Timestamp) val).write(out);
            return;
        default:
            throw new IOException("Unexpected data type " + type + " found in stream.");
    }
}
Also used : VIntWritable(org.apache.hadoop.io.VIntWritable) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Entry(java.util.Map.Entry) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ArrayList(java.util.ArrayList) List(java.util.List) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) IOException(java.io.IOException) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) Date(org.apache.hadoop.hive.common.type.Date) VLongWritable(org.apache.hadoop.io.VLongWritable) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 15 with HiveVarchar

use of org.apache.hadoop.hive.common.type.HiveVarchar in project hive by apache.

the class VectorizedBatchUtil method setVector.

private static void setVector(Object row, StructObjectInspector oi, StructField field, VectorizedRowBatch batch, DataOutputBuffer buffer, int rowIndex, int colIndex, int offset) throws HiveException {
    Object fieldData = oi.getStructFieldData(row, field);
    ObjectInspector foi = field.getFieldObjectInspector();
    // Vectorization only supports PRIMITIVE data types. Assert the same
    assert (foi.getCategory() == Category.PRIMITIVE);
    // Get writable object
    PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
    Object writableCol = poi.getPrimitiveWritableObject(fieldData);
    // float/double. String types have no default value for null.
    switch(poi.getPrimitiveCategory()) {
        case BOOLEAN:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case BYTE:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case SHORT:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case INT:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case LONG:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case DATE:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.vector[rowIndex] = ((DateWritableV2) writableCol).getDays();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case FLOAT:
            {
                DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
                    dcv.isNull[rowIndex] = false;
                } else {
                    dcv.vector[rowIndex] = Double.NaN;
                    setNullColIsNullValue(dcv, rowIndex);
                }
            }
            break;
        case DOUBLE:
            {
                DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
                    dcv.isNull[rowIndex] = false;
                } else {
                    dcv.vector[rowIndex] = Double.NaN;
                    setNullColIsNullValue(dcv, rowIndex);
                }
            }
            break;
        case TIMESTAMP:
            {
                TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    lcv.set(rowIndex, ((TimestampWritableV2) writableCol).getTimestamp().toSqlTimestamp());
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.setNullValue(rowIndex);
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case INTERVAL_YEAR_MONTH:
            {
                LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    HiveIntervalYearMonth i = ((HiveIntervalYearMonthWritable) writableCol).getHiveIntervalYearMonth();
                    lcv.vector[rowIndex] = i.getTotalMonths();
                    lcv.isNull[rowIndex] = false;
                } else {
                    lcv.vector[rowIndex] = 1;
                    setNullColIsNullValue(lcv, rowIndex);
                }
            }
            break;
        case INTERVAL_DAY_TIME:
            {
                IntervalDayTimeColumnVector icv = (IntervalDayTimeColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    HiveIntervalDayTime idt = ((HiveIntervalDayTimeWritable) writableCol).getHiveIntervalDayTime();
                    icv.set(rowIndex, idt);
                    icv.isNull[rowIndex] = false;
                } else {
                    icv.setNullValue(rowIndex);
                    setNullColIsNullValue(icv, rowIndex);
                }
            }
            break;
        case BINARY:
            {
                BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    bcv.isNull[rowIndex] = false;
                    BytesWritable bw = (BytesWritable) writableCol;
                    byte[] bytes = bw.getBytes();
                    int start = buffer.getLength();
                    int length = bw.getLength();
                    try {
                        buffer.write(bytes, 0, length);
                    } catch (IOException ioe) {
                        throw new IllegalStateException("bad write", ioe);
                    }
                    bcv.setRef(rowIndex, buffer.getData(), start, length);
                } else {
                    setNullColIsNullValue(bcv, rowIndex);
                }
            }
            break;
        case STRING:
            {
                BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    bcv.isNull[rowIndex] = false;
                    Text colText = (Text) writableCol;
                    int start = buffer.getLength();
                    int length = colText.getLength();
                    try {
                        buffer.write(colText.getBytes(), 0, length);
                    } catch (IOException ioe) {
                        throw new IllegalStateException("bad write", ioe);
                    }
                    bcv.setRef(rowIndex, buffer.getData(), start, length);
                } else {
                    setNullColIsNullValue(bcv, rowIndex);
                }
            }
            break;
        case CHAR:
            {
                BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    bcv.isNull[rowIndex] = false;
                    HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
                    byte[] bytes = colHiveChar.getStrippedValue().getBytes();
                    // We assume the CHAR maximum length was enforced when the object was created.
                    int length = bytes.length;
                    int start = buffer.getLength();
                    try {
                        // In vector mode, we store CHAR as unpadded.
                        buffer.write(bytes, 0, length);
                    } catch (IOException ioe) {
                        throw new IllegalStateException("bad write", ioe);
                    }
                    bcv.setRef(rowIndex, buffer.getData(), start, length);
                } else {
                    setNullColIsNullValue(bcv, rowIndex);
                }
            }
            break;
        case VARCHAR:
            {
                BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
                if (writableCol != null) {
                    bcv.isNull[rowIndex] = false;
                    HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
                    byte[] bytes = colHiveVarchar.getValue().getBytes();
                    // We assume the VARCHAR maximum length was enforced when the object was created.
                    int length = bytes.length;
                    int start = buffer.getLength();
                    try {
                        buffer.write(bytes, 0, length);
                    } catch (IOException ioe) {
                        throw new IllegalStateException("bad write", ioe);
                    }
                    bcv.setRef(rowIndex, buffer.getData(), start, length);
                } else {
                    setNullColIsNullValue(bcv, rowIndex);
                }
            }
            break;
        case DECIMAL:
            DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[offset + colIndex];
            if (writableCol != null) {
                dcv.isNull[rowIndex] = false;
                HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
                dcv.set(rowIndex, wobj);
            } else {
                setNullColIsNullValue(dcv, rowIndex);
            }
            break;
        default:
            throw new HiveException("Vectorizaton is not supported for datatype:" + poi.getPrimitiveCategory());
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)

Aggregations

HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)95 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)61 Test (org.junit.Test)35 Text (org.apache.hadoop.io.Text)31 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)28 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)27 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)26 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)23 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)21 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)21 ArrayList (java.util.ArrayList)20 Timestamp (org.apache.hadoop.hive.common.type.Timestamp)20 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)20 LongWritable (org.apache.hadoop.io.LongWritable)19 Date (org.apache.hadoop.hive.common.type.Date)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)18 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)17 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)17 BooleanWritable (org.apache.hadoop.io.BooleanWritable)17 FloatWritable (org.apache.hadoop.io.FloatWritable)17