use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.
the class VectorRandomRowSource method randomObject.
public static Object randomObject(int column, Random r, PrimitiveCategory[] primitiveCategories, PrimitiveTypeInfo[] primitiveTypeInfos, String[] alphabets, boolean addEscapables, String needsEscapeStr) {
PrimitiveCategory primitiveCategory = primitiveCategories[column];
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
try {
switch(primitiveCategory) {
case BOOLEAN:
return Boolean.valueOf(r.nextInt(1) == 1);
case BYTE:
return Byte.valueOf((byte) r.nextInt());
case SHORT:
return Short.valueOf((short) r.nextInt());
case INT:
return Integer.valueOf(r.nextInt());
case LONG:
return Long.valueOf(r.nextLong());
case DATE:
return RandomTypeUtil.getRandDate(r);
case FLOAT:
return Float.valueOf(r.nextFloat() * 10 - 5);
case DOUBLE:
return Double.valueOf(r.nextDouble() * 10 - 5);
case STRING:
case CHAR:
case VARCHAR:
{
String result;
if (alphabets != null && alphabets[column] != null) {
result = RandomTypeUtil.getRandString(r, alphabets[column], r.nextInt(10));
} else {
result = RandomTypeUtil.getRandString(r);
}
if (addEscapables && result.length() > 0) {
int escapeCount = 1 + r.nextInt(2);
for (int i = 0; i < escapeCount; i++) {
int index = r.nextInt(result.length());
String begin = result.substring(0, index);
String end = result.substring(index);
Character needsEscapeChar = needsEscapeStr.charAt(r.nextInt(needsEscapeStr.length()));
result = begin + needsEscapeChar + end;
}
}
switch(primitiveCategory) {
case STRING:
return result;
case CHAR:
return new HiveChar(result, ((CharTypeInfo) primitiveTypeInfo).getLength());
case VARCHAR:
return new HiveVarchar(result, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
default:
throw new Error("Unknown primitive category " + primitiveCategory);
}
}
case BINARY:
return getRandBinary(r, 1 + r.nextInt(100));
case TIMESTAMP:
return RandomTypeUtil.getRandTimestamp(r);
case INTERVAL_YEAR_MONTH:
return getRandIntervalYearMonth(r);
case INTERVAL_DAY_TIME:
return getRandIntervalDayTime(r);
case DECIMAL:
return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
default:
throw new Error("Unknown primitive category " + primitiveCategory);
}
} catch (Exception e) {
throw new RuntimeException("randomObject failed on column " + column + " type " + primitiveCategory, e);
}
}
use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.
the class TestGenericUDFOPPositive method testVarchar.
@Test
public void testVarchar() throws HiveException {
GenericUDFOPPositive udf = new GenericUDFOPPositive();
HiveVarchar vc = new HiveVarchar("32300.004747", 12);
HiveVarcharWritable input = new HiveVarcharWritable(vc);
VarcharTypeInfo inputTypeInfo = TypeInfoFactory.getVarcharTypeInfo(12);
ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(inputTypeInfo) };
DeferredObject[] args = { new DeferredJavaObject(input) };
PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
Assert.assertEquals(TypeInfoFactory.doubleTypeInfo, oi.getTypeInfo());
DoubleWritable res = (DoubleWritable) udf.evaluate(args);
Assert.assertEquals(new Double(32300.004747), new Double(res.get()));
}
use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.
the class TestObjectInspectorConverters method testGetConvertedOI.
public void testGetConvertedOI() throws Throwable {
// Try with types that have type params
PrimitiveTypeInfo varchar5TI = (PrimitiveTypeInfo) TypeInfoFactory.getPrimitiveTypeInfo("varchar(5)");
PrimitiveTypeInfo varchar10TI = (PrimitiveTypeInfo) TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)");
PrimitiveObjectInspector varchar5OI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(varchar5TI);
PrimitiveObjectInspector varchar10OI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(varchar10TI);
// output OI should have varchar type params
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) ObjectInspectorConverters.getConvertedOI(varchar10OI, varchar5OI);
VarcharTypeInfo vcParams = (VarcharTypeInfo) poi.getTypeInfo();
assertEquals("varchar length doesn't match", 5, vcParams.getLength());
}
use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.
the class TestGenericUDFCeil method testVarchar.
@Test
public void testVarchar() throws HiveException {
GenericUDFCeil udf = new GenericUDFCeil();
HiveVarchar vc = new HiveVarchar("32300.004747", 12);
HiveVarcharWritable input = new HiveVarcharWritable(vc);
VarcharTypeInfo inputTypeInfo = TypeInfoFactory.getVarcharTypeInfo(12);
ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(inputTypeInfo) };
DeferredObject[] args = { new DeferredJavaObject(input) };
PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
Assert.assertEquals(TypeInfoFactory.longTypeInfo, oi.getTypeInfo());
LongWritable res = (LongWritable) udf.evaluate(args);
Assert.assertEquals(32301L, res.get());
}
use of org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo in project hive by apache.
the class RegexSerDe method deserialize.
@Override
public Object deserialize(Writable blob) throws SerDeException {
Text rowText = (Text) blob;
Matcher m = inputPattern.matcher(rowText.toString());
if (m.groupCount() != numColumns) {
throw new SerDeException("Number of matching groups doesn't match the number of columns");
}
// If do not match, ignore the line, return a row with all nulls.
if (!m.matches()) {
unmatchedRowsCount++;
if (!alreadyLoggedNoMatch) {
// Report the row if its the first time
LOG.warn("" + unmatchedRowsCount + " unmatched rows are found: " + rowText);
alreadyLoggedNoMatch = true;
}
return null;
}
// Otherwise, return the row.
for (int c = 0; c < numColumns; c++) {
try {
String t = m.group(c + 1);
TypeInfo typeInfo = columnTypes.get(c);
// Convert the column to the correct type when needed and set in row obj
PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo;
switch(pti.getPrimitiveCategory()) {
case STRING:
row.set(c, t);
break;
case BYTE:
Byte b;
b = Byte.valueOf(t);
row.set(c, b);
break;
case SHORT:
Short s;
s = Short.valueOf(t);
row.set(c, s);
break;
case INT:
Integer i;
i = Integer.valueOf(t);
row.set(c, i);
break;
case LONG:
Long l;
l = Long.valueOf(t);
row.set(c, l);
break;
case FLOAT:
Float f;
f = Float.valueOf(t);
row.set(c, f);
break;
case DOUBLE:
Double d;
d = Double.valueOf(t);
row.set(c, d);
break;
case BOOLEAN:
Boolean bool;
bool = Boolean.valueOf(t);
row.set(c, bool);
break;
case TIMESTAMP:
Timestamp ts;
ts = Timestamp.valueOf(t);
row.set(c, ts);
break;
case DATE:
Date date;
date = Date.valueOf(t);
row.set(c, date);
break;
case DECIMAL:
HiveDecimal bd = HiveDecimal.create(t);
row.set(c, bd);
break;
case CHAR:
HiveChar hc = new HiveChar(t, ((CharTypeInfo) typeInfo).getLength());
row.set(c, hc);
break;
case VARCHAR:
HiveVarchar hv = new HiveVarchar(t, ((VarcharTypeInfo) typeInfo).getLength());
row.set(c, hv);
break;
default:
throw new SerDeException("Unsupported type " + typeInfo);
}
} catch (RuntimeException e) {
partialMatchedRowsCount++;
if (!alreadyLoggedPartialMatch) {
// Report the row if its the first row
LOG.warn("" + partialMatchedRowsCount + " partially unmatched rows are found, " + " cannot find group " + c + ": " + rowText);
alreadyLoggedPartialMatch = true;
}
row.set(c, null);
}
}
return row;
}
Aggregations