Search in sources :

Example 6 with VarcharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.

the class VectorRandomRowSource method randomObject.

public static Object randomObject(int column, Random r, PrimitiveCategory[] primitiveCategories, PrimitiveTypeInfo[] primitiveTypeInfos, String[] alphabets, boolean addEscapables, String needsEscapeStr) {
    PrimitiveCategory primitiveCategory = primitiveCategories[column];
    PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
    try {
        switch(primitiveCategory) {
            case BOOLEAN:
                return Boolean.valueOf(r.nextInt(1) == 1);
            case BYTE:
                return Byte.valueOf((byte) r.nextInt());
            case SHORT:
                return Short.valueOf((short) r.nextInt());
            case INT:
                return Integer.valueOf(r.nextInt());
            case LONG:
                return Long.valueOf(r.nextLong());
            case DATE:
                return RandomTypeUtil.getRandDate(r);
            case FLOAT:
                return Float.valueOf(r.nextFloat() * 10 - 5);
            case DOUBLE:
                return Double.valueOf(r.nextDouble() * 10 - 5);
            case STRING:
            case CHAR:
            case VARCHAR:
                {
                    String result;
                    if (alphabets != null && alphabets[column] != null) {
                        result = RandomTypeUtil.getRandString(r, alphabets[column], r.nextInt(10));
                    } else {
                        result = RandomTypeUtil.getRandString(r);
                    }
                    if (addEscapables && result.length() > 0) {
                        int escapeCount = 1 + r.nextInt(2);
                        for (int i = 0; i < escapeCount; i++) {
                            int index = r.nextInt(result.length());
                            String begin = result.substring(0, index);
                            String end = result.substring(index);
                            Character needsEscapeChar = needsEscapeStr.charAt(r.nextInt(needsEscapeStr.length()));
                            result = begin + needsEscapeChar + end;
                        }
                    }
                    switch(primitiveCategory) {
                        case STRING:
                            return result;
                        case CHAR:
                            return new HiveChar(result, ((CharTypeInfo) primitiveTypeInfo).getLength());
                        case VARCHAR:
                            return new HiveVarchar(result, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
                        default:
                            throw new Error("Unknown primitive category " + primitiveCategory);
                    }
                }
            case BINARY:
                return getRandBinary(r, 1 + r.nextInt(100));
            case TIMESTAMP:
                return RandomTypeUtil.getRandTimestamp(r);
            case INTERVAL_YEAR_MONTH:
                return getRandIntervalYearMonth(r);
            case INTERVAL_DAY_TIME:
                return getRandIntervalDayTime(r);
            case DECIMAL:
                return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
            default:
                throw new Error("Unknown primitive category " + primitiveCategory);
        }
    } catch (Exception e) {
        throw new RuntimeException("randomObject failed on column " + column + " type " + primitiveCategory, e);
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 7 with VarcharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.

the class TestGenericUDFOPPositive method testVarchar.

@Test
public void testVarchar() throws HiveException {
    GenericUDFOPPositive udf = new GenericUDFOPPositive();
    HiveVarchar vc = new HiveVarchar("32300.004747", 12);
    HiveVarcharWritable input = new HiveVarcharWritable(vc);
    VarcharTypeInfo inputTypeInfo = TypeInfoFactory.getVarcharTypeInfo(12);
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(inputTypeInfo) };
    DeferredObject[] args = { new DeferredJavaObject(input) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(TypeInfoFactory.doubleTypeInfo, oi.getTypeInfo());
    DoubleWritable res = (DoubleWritable) udf.evaluate(args);
    Assert.assertEquals(new Double(32300.004747), new Double(res.get()));
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Test(org.junit.Test)

Example 8 with VarcharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.

the class TestObjectInspectorConverters method testGetConvertedOI.

public void testGetConvertedOI() throws Throwable {
    // Try with types that have type params
    PrimitiveTypeInfo varchar5TI = (PrimitiveTypeInfo) TypeInfoFactory.getPrimitiveTypeInfo("varchar(5)");
    PrimitiveTypeInfo varchar10TI = (PrimitiveTypeInfo) TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)");
    PrimitiveObjectInspector varchar5OI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(varchar5TI);
    PrimitiveObjectInspector varchar10OI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(varchar10TI);
    // output OI should have varchar type params
    PrimitiveObjectInspector poi = (PrimitiveObjectInspector) ObjectInspectorConverters.getConvertedOI(varchar10OI, varchar5OI);
    VarcharTypeInfo vcParams = (VarcharTypeInfo) poi.getTypeInfo();
    assertEquals("varchar length doesn't match", 5, vcParams.getLength());
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 9 with VarcharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.

the class TestGenericUDFCeil method testVarchar.

@Test
public void testVarchar() throws HiveException {
    GenericUDFCeil udf = new GenericUDFCeil();
    HiveVarchar vc = new HiveVarchar("32300.004747", 12);
    HiveVarcharWritable input = new HiveVarcharWritable(vc);
    VarcharTypeInfo inputTypeInfo = TypeInfoFactory.getVarcharTypeInfo(12);
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(inputTypeInfo) };
    DeferredObject[] args = { new DeferredJavaObject(input) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals(TypeInfoFactory.longTypeInfo, oi.getTypeInfo());
    LongWritable res = (LongWritable) udf.evaluate(args);
    Assert.assertEquals(32301L, res.get());
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) LongWritable(org.apache.hadoop.io.LongWritable) Test(org.junit.Test)

Example 10 with VarcharTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.

the class RegexSerDe method deserialize.

@Override
public Object deserialize(Writable blob) throws SerDeException {
    Text rowText = (Text) blob;
    Matcher m = inputPattern.matcher(rowText.toString());
    if (m.groupCount() != numColumns) {
        throw new SerDeException("Number of matching groups doesn't match the number of columns");
    }
    // If do not match, ignore the line, return a row with all nulls.
    if (!m.matches()) {
        unmatchedRowsCount++;
        if (!alreadyLoggedNoMatch) {
            // Report the row if its the first time
            LOG.warn("" + unmatchedRowsCount + " unmatched rows are found: " + rowText);
            alreadyLoggedNoMatch = true;
        }
        return null;
    }
    // Otherwise, return the row.
    for (int c = 0; c < numColumns; c++) {
        try {
            String t = m.group(c + 1);
            TypeInfo typeInfo = columnTypes.get(c);
            // Convert the column to the correct type when needed and set in row obj
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo;
            switch(pti.getPrimitiveCategory()) {
                case STRING:
                    row.set(c, t);
                    break;
                case BYTE:
                    Byte b;
                    b = Byte.valueOf(t);
                    row.set(c, b);
                    break;
                case SHORT:
                    Short s;
                    s = Short.valueOf(t);
                    row.set(c, s);
                    break;
                case INT:
                    Integer i;
                    i = Integer.valueOf(t);
                    row.set(c, i);
                    break;
                case LONG:
                    Long l;
                    l = Long.valueOf(t);
                    row.set(c, l);
                    break;
                case FLOAT:
                    Float f;
                    f = Float.valueOf(t);
                    row.set(c, f);
                    break;
                case DOUBLE:
                    Double d;
                    d = Double.valueOf(t);
                    row.set(c, d);
                    break;
                case BOOLEAN:
                    Boolean bool;
                    bool = Boolean.valueOf(t);
                    row.set(c, bool);
                    break;
                case TIMESTAMP:
                    Timestamp ts;
                    ts = Timestamp.valueOf(t);
                    row.set(c, ts);
                    break;
                case DATE:
                    Date date;
                    date = Date.valueOf(t);
                    row.set(c, date);
                    break;
                case DECIMAL:
                    HiveDecimal bd = HiveDecimal.create(t);
                    row.set(c, bd);
                    break;
                case CHAR:
                    HiveChar hc = new HiveChar(t, ((CharTypeInfo) typeInfo).getLength());
                    row.set(c, hc);
                    break;
                case VARCHAR:
                    HiveVarchar hv = new HiveVarchar(t, ((VarcharTypeInfo) typeInfo).getLength());
                    row.set(c, hv);
                    break;
                default:
                    throw new SerDeException("Unsupported type " + typeInfo);
            }
        } catch (RuntimeException e) {
            partialMatchedRowsCount++;
            if (!alreadyLoggedPartialMatch) {
                // Report the row if its the first row
                LOG.warn("" + partialMatchedRowsCount + " partially unmatched rows are found, " + " cannot find group " + c + ": " + rowText);
                alreadyLoggedPartialMatch = true;
            }
            row.set(c, null);
        }
    }
    return row;
}
Also used : Matcher(java.util.regex.Matcher) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) Text(org.apache.hadoop.io.Text) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Date(java.sql.Date) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal)

Aggregations

VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)26 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)18 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)18 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)13 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)12 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)12 Timestamp (java.sql.Timestamp)10 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)9 Text (org.apache.hadoop.io.Text)9 Test (org.junit.Test)9 HiveVarcharWritable (org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)8 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)8 Date (java.sql.Date)7 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)6 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 BytesWritable (org.apache.hadoop.io.BytesWritable)5 LongWritable (org.apache.hadoop.io.LongWritable)5 ArrayList (java.util.ArrayList)4 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)4 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)4