use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class GenericUDFInBloomFilter method evaluate.
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
// Return if either of the arguments is null
if (arguments[0].get() == null || arguments[1].get() == null) {
return null;
}
if (!initializedBloomFilter) {
// Setup the bloom filter once
InputStream in = null;
try {
BytesWritable bw = (BytesWritable) arguments[1].get();
byte[] bytes = new byte[bw.getLength()];
System.arraycopy(bw.getBytes(), 0, bytes, 0, bw.getLength());
in = new NonSyncByteArrayInputStream(bytes);
bloomFilter = BloomKFilter.deserialize(in);
} catch (IOException e) {
throw new HiveException(e);
} finally {
IOUtils.closeStream(in);
}
initializedBloomFilter = true;
}
// Check if the value is in bloom filter
switch(((PrimitiveObjectInspector) valObjectInspector).getTypeInfo().getPrimitiveCategory()) {
case BOOLEAN:
boolean vBoolean = ((BooleanObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vBoolean ? 1 : 0);
case BYTE:
byte vByte = ((ByteObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vByte);
case SHORT:
short vShort = ((ShortObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vShort);
case INT:
int vInt = ((IntObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vInt);
case LONG:
long vLong = ((LongObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vLong);
case FLOAT:
float vFloat = ((FloatObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testDouble(vFloat);
case DOUBLE:
double vDouble = ((DoubleObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testDouble(vDouble);
case DECIMAL:
HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
int startIdx = vDecimal.toBytes(scratchBuffer);
return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx);
case DATE:
DateWritableV2 vDate = ((DateObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
return bloomFilter.testLong(vDate.getDays());
case TIMESTAMP:
Timestamp vTimeStamp = ((TimestampObjectInspector) valObjectInspector).getPrimitiveJavaObject(arguments[0].get());
return bloomFilter.testLong(vTimeStamp.toEpochMilli());
case CHAR:
Text vChar = ((HiveCharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getStrippedValue();
return bloomFilter.testBytes(vChar.getBytes(), 0, vChar.getLength());
case VARCHAR:
Text vVarchar = ((HiveVarcharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getTextValue();
return bloomFilter.testBytes(vVarchar.getBytes(), 0, vVarchar.getLength());
case STRING:
Text vString = ((StringObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
return bloomFilter.testBytes(vString.getBytes(), 0, vString.getLength());
case BINARY:
BytesWritable vBytes = ((BinaryObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
return bloomFilter.testBytes(vBytes.getBytes(), 0, vBytes.getLength());
default:
throw new UDFArgumentTypeException(0, "Bad primitive category " + ((PrimitiveTypeInfo) valObjectInspector).getPrimitiveCategory());
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class GenericUDFDateAdd method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 2) {
throw new UDFArgumentLengthException("date_add() requires 2 argument, got " + arguments.length);
}
if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + arguments[0].getTypeName() + " is passed. as first arguments");
}
if (arguments[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but " + arguments[1].getTypeName() + " is passed. as second arguments");
}
inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory();
ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
switch(inputType1) {
case STRING:
case VARCHAR:
case CHAR:
inputType1 = PrimitiveCategory.STRING;
dateConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[0], PrimitiveObjectInspectorFactory.writableStringObjectInspector);
break;
case TIMESTAMP:
dateConverter = new TimestampConverter((PrimitiveObjectInspector) arguments[0], PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
break;
case DATE:
dateConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[0], PrimitiveObjectInspectorFactory.writableDateObjectInspector);
break;
default:
throw new UDFArgumentException(" DATE_ADD() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got " + inputType1);
}
inputType2 = ((PrimitiveObjectInspector) arguments[1]).getPrimitiveCategory();
switch(inputType2) {
case BYTE:
daysConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[1], PrimitiveObjectInspectorFactory.writableByteObjectInspector);
break;
case SHORT:
daysConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[1], PrimitiveObjectInspectorFactory.writableShortObjectInspector);
break;
case INT:
daysConverter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[1], PrimitiveObjectInspectorFactory.writableIntObjectInspector);
break;
default:
throw new UDFArgumentException(" DATE_ADD() only takes TINYINT/SMALLINT/INT types as second argument, got " + inputType2);
}
return outputOI;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class GenericUDFDatetimeLegacyHybridCalendar method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length < 1) {
throw new UDFArgumentLengthException("The function datetime_legacy_hybrid_calendar requires at least one argument, got " + arguments.length);
}
try {
inputOI = (PrimitiveObjectInspector) arguments[0];
PrimitiveCategory pc = inputOI.getPrimitiveCategory();
switch(pc) {
case DATE:
formatter = new SimpleDateFormat("yyyy-MM-dd");
formatter.setLenient(false);
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.writableDateObjectInspector);
resultOI = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
break;
case TIMESTAMP:
formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
formatter.setLenient(false);
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
resultOI = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
break;
default:
throw new UDFArgumentException("datetime_legacy_hybrid_calendar only allows date or timestamp types");
}
} catch (ClassCastException e) {
throw new UDFArgumentException("The function datetime_legacy_hybrid_calendar takes only primitive types");
}
return resultOI;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class GenericUDFCastFormat method initialize.
/**
* @param arguments
* 0. const int, value of a HiveParser_IdentifiersParser constant which represents a TOK_[TYPE]
* 1. expression to convert
* 2. constant string, format pattern
* 3. (optional) constant int, output char/varchar length
*/
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 3 && arguments.length != 4) {
throw new UDFArgumentException("Function cast_format requires 3 or 4 arguments (int, expression, StringLiteral" + "[, var/char length]), got " + arguments.length);
}
outputOI = getOutputOI(arguments);
try {
inputOI = (PrimitiveObjectInspector) arguments[1];
} catch (ClassCastException e) {
throw new UDFArgumentException("Function CAST...as ... FORMAT ...takes only primitive types");
}
PrimitiveObjectInspectorUtils.PrimitiveGrouping inputPG = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(inputOI.getPrimitiveCategory());
PrimitiveObjectInspectorUtils.PrimitiveGrouping outputPG = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(outputOI.getPrimitiveCategory());
if (inputOI.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMPLOCALTZ) {
throw new UDFArgumentException("Timestamp with local time zone not yet supported for cast ... format function");
}
if (!(inputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP && outputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP || inputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP && outputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP || inputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP)) {
throw new UDFArgumentException("Function CAST...as ... FORMAT ... only converts datetime objects to string types" + " and string or void objects to datetime types. Type of object provided: " + outputOI.getPrimitiveCategory() + " in primitive grouping " + inputPG + ", type provided: " + inputOI.getPrimitiveCategory() + " in primitive grouping " + outputPG);
}
boolean forParsing = (outputPG == PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP);
formatter = new HiveSqlDateTimeFormatter(getConstantStringValue(arguments, 2), forParsing);
return outputOI;
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE in project hive by apache.
the class GenericUDFDecode method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 2) {
throw new UDFArgumentLengthException("Decode() requires exactly two arguments");
}
if (arguments[0].getCategory() != Category.PRIMITIVE) {
throw new UDFArgumentTypeException(0, "The first argument to Decode() must be primitive");
}
PrimitiveCategory category = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory();
if (category == PrimitiveCategory.BINARY) {
bytesOI = (BinaryObjectInspector) arguments[0];
} else if (category == PrimitiveCategory.VOID) {
bytesOI = (VoidObjectInspector) arguments[0];
} else {
throw new UDFArgumentTypeException(0, "The first argument to Decode() must be binary");
}
if (arguments[1].getCategory() != Category.PRIMITIVE) {
throw new UDFArgumentTypeException(1, "The second argument to Decode() must be primitive");
}
charsetOI = (PrimitiveObjectInspector) arguments[1];
if (PrimitiveGrouping.STRING_GROUP != PrimitiveObjectInspectorUtils.getPrimitiveGrouping(charsetOI.getPrimitiveCategory())) {
throw new UDFArgumentTypeException(1, "The second argument to Decode() must be from string group");
}
// If the character set for decoding is constant, we can optimize that
if (arguments[1] instanceof ConstantObjectInspector) {
String charSetName = ((ConstantObjectInspector) arguments[1]).getWritableConstantValue().toString();
decoder = Charset.forName(charSetName).newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
}
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
Aggregations