use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.
the class GenericUDFInBloomFilter method evaluate.
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
// Return if either of the arguments is null
if (arguments[0].get() == null || arguments[1].get() == null) {
return null;
}
if (!initializedBloomFilter) {
// Setup the bloom filter once
InputStream in = null;
try {
BytesWritable bw = (BytesWritable) arguments[1].get();
byte[] bytes = new byte[bw.getLength()];
System.arraycopy(bw.getBytes(), 0, bytes, 0, bw.getLength());
in = new NonSyncByteArrayInputStream(bytes);
bloomFilter = BloomKFilter.deserialize(in);
} catch (IOException e) {
throw new HiveException(e);
} finally {
IOUtils.closeStream(in);
}
initializedBloomFilter = true;
}
// Check if the value is in bloom filter
switch(((PrimitiveObjectInspector) valObjectInspector).getTypeInfo().getPrimitiveCategory()) {
case BOOLEAN:
boolean vBoolean = ((BooleanObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vBoolean ? 1 : 0);
case BYTE:
byte vByte = ((ByteObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vByte);
case SHORT:
short vShort = ((ShortObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vShort);
case INT:
int vInt = ((IntObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vInt);
case LONG:
long vLong = ((LongObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testLong(vLong);
case FLOAT:
float vFloat = ((FloatObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testDouble(vFloat);
case DOUBLE:
double vDouble = ((DoubleObjectInspector) valObjectInspector).get(arguments[0].get());
return bloomFilter.testDouble(vDouble);
case DECIMAL:
HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
int startIdx = vDecimal.toBytes(scratchBuffer);
return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx);
case DATE:
DateWritableV2 vDate = ((DateObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
return bloomFilter.testLong(vDate.getDays());
case TIMESTAMP:
Timestamp vTimeStamp = ((TimestampObjectInspector) valObjectInspector).getPrimitiveJavaObject(arguments[0].get());
return bloomFilter.testLong(vTimeStamp.toEpochMilli());
case CHAR:
Text vChar = ((HiveCharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getStrippedValue();
return bloomFilter.testBytes(vChar.getBytes(), 0, vChar.getLength());
case VARCHAR:
Text vVarchar = ((HiveVarcharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getTextValue();
return bloomFilter.testBytes(vVarchar.getBytes(), 0, vVarchar.getLength());
case STRING:
Text vString = ((StringObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
return bloomFilter.testBytes(vString.getBytes(), 0, vString.getLength());
case BINARY:
BytesWritable vBytes = ((BinaryObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
return bloomFilter.testBytes(vBytes.getBytes(), 0, vBytes.getLength());
default:
throw new UDFArgumentTypeException(0, "Bad primitive category " + ((PrimitiveTypeInfo) valObjectInspector).getPrimitiveCategory());
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.
the class GenericUDFFromUnixTime method initialize.
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
checkArgsSize(arguments, 1, 2);
for (int i = 0; i < arguments.length; i++) {
checkArgPrimitive(arguments, i);
}
PrimitiveObjectInspector arg0OI = (PrimitiveObjectInspector) arguments[0];
switch(arg0OI.getPrimitiveCategory()) {
case INT:
inputIntOI = (IntObjectInspector) arguments[0];
break;
case LONG:
inputLongOI = (LongObjectInspector) arguments[0];
break;
default:
throw new UDFArgumentException("The function from_unixtime takes only int/long types for first argument. Got Type:" + arg0OI.getPrimitiveCategory().name());
}
if (arguments.length == 2) {
checkArgGroups(arguments, 1, inputTypes, STRING_GROUP);
obtainStringConverter(arguments, 1, inputTypes, converters);
}
if (timeZone == null) {
timeZone = SessionState.get() == null ? new HiveConf().getLocalTimeZone() : SessionState.get().getConf().getLocalTimeZone();
FORMATTER.withZone(timeZone);
}
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.
the class KuduSerDe method serialize.
/**
* Serialize an object by navigating inside the Object with the ObjectInspector.
*/
@Override
public KuduWritable serialize(Object obj, ObjectInspector objectInspector) throws SerDeException {
Preconditions.checkArgument(objectInspector.getCategory() == Category.STRUCT);
StructObjectInspector soi = (StructObjectInspector) objectInspector;
List<Object> writableObj = soi.getStructFieldsDataAsList(obj);
List<? extends StructField> fields = soi.getAllStructFieldRefs();
PartialRow row = schema.newPartialRow();
for (int i = 0; i < schema.getColumnCount(); i++) {
StructField field = fields.get(i);
Object value = writableObj.get(i);
if (value == null) {
row.setNull(i);
} else {
Type type = schema.getColumnByIndex(i).getType();
ObjectInspector inspector = field.getFieldObjectInspector();
switch(type) {
case BOOL:
boolean boolVal = ((BooleanObjectInspector) inspector).get(value);
row.addBoolean(i, boolVal);
break;
case INT8:
byte byteVal = ((ByteObjectInspector) inspector).get(value);
row.addByte(i, byteVal);
break;
case INT16:
short shortVal = ((ShortObjectInspector) inspector).get(value);
row.addShort(i, shortVal);
break;
case INT32:
int intVal = ((IntObjectInspector) inspector).get(value);
row.addInt(i, intVal);
break;
case INT64:
long longVal = ((LongObjectInspector) inspector).get(value);
row.addLong(i, longVal);
break;
case UNIXTIME_MICROS:
// Calling toSqlTimestamp and using the addTimestamp API ensures we properly
// convert Hive localDateTime to UTC.
java.sql.Timestamp timestampVal = ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(value).toSqlTimestamp();
row.addTimestamp(i, timestampVal);
break;
case DECIMAL:
HiveDecimal decimalVal = ((HiveDecimalObjectInspector) inspector).getPrimitiveJavaObject(value);
row.addDecimal(i, decimalVal.bigDecimalValue());
break;
case FLOAT:
float floatVal = ((FloatObjectInspector) inspector).get(value);
row.addFloat(i, floatVal);
break;
case DOUBLE:
double doubleVal = ((DoubleObjectInspector) inspector).get(value);
row.addDouble(i, doubleVal);
break;
case STRING:
String stringVal = ((StringObjectInspector) inspector).getPrimitiveJavaObject(value);
row.addString(i, stringVal);
break;
case BINARY:
byte[] bytesVal = ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(value);
row.addBinary(i, bytesVal);
break;
default:
throw new SerDeException("Unsupported column type: " + type.name());
}
}
}
return new KuduWritable(row);
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.
the class FixAcidKeyIndex method validate.
public static AcidKeyIndexValidationResult validate(Configuration conf, Path inputPath) throws IOException {
AcidKeyIndexValidationResult result = new AcidKeyIndexValidationResult();
FileSystem fs = inputPath.getFileSystem(conf);
try (Reader reader = OrcFile.createReader(fs, inputPath);
RecordReader rr = reader.rows()) {
List<StripeInformation> stripes = reader.getStripes();
RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
StructObjectInspector soi = (StructObjectInspector) reader.getObjectInspector();
// struct<operation:int,originalTransaction:bigint,bucket:int,rowId:bigint,currentTransaction:bigint
List<? extends StructField> structFields = soi.getAllStructFieldRefs();
StructField transactionField = structFields.get(1);
LongObjectInspector transactionOI = (LongObjectInspector) transactionField.getFieldObjectInspector();
StructField bucketField = structFields.get(2);
IntObjectInspector bucketOI = (IntObjectInspector) bucketField.getFieldObjectInspector();
StructField rowIdField = structFields.get(3);
LongObjectInspector rowIdOI = (LongObjectInspector) rowIdField.getFieldObjectInspector();
long rowsProcessed = 0;
for (int i = 0; i < stripes.size(); i++) {
rowsProcessed += stripes.get(i).getNumberOfRows();
rr.seekToRow(rowsProcessed - 1);
OrcStruct row = (OrcStruct) rr.next(null);
long lastTransaction = transactionOI.get(soi.getStructFieldData(row, transactionField));
int lastBucket = bucketOI.get(soi.getStructFieldData(row, bucketField));
long lastRowId = rowIdOI.get(soi.getStructFieldData(row, rowIdField));
RecordIdentifier recordIdentifier = new RecordIdentifier(lastTransaction, lastBucket, lastRowId);
result.recordIdentifiers.add(recordIdentifier);
if (stripes.size() != keyIndex.length || keyIndex[i] == null || recordIdentifier.compareTo(keyIndex[i]) != 0) {
result.isValid = false;
}
}
}
return result;
}
use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.
the class FileSinkOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
try {
this.hconf = hconf;
filesCreated = false;
isTemporary = conf.isTemporary();
multiFileSpray = conf.isMultiFileSpray();
this.isBucketed = hconf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0;
totalFiles = conf.getTotalFiles();
numFiles = conf.getNumFiles();
dpCtx = conf.getDynPartCtx();
lbCtx = conf.getLbCtx();
fsp = prevFsp = null;
valToPaths = new HashMap<String, FSPaths>();
taskId = originalTaskId = Utilities.getTaskId(hconf);
initializeSpecPath();
fs = specPath.getFileSystem(hconf);
if (hconf instanceof JobConf) {
jc = (JobConf) hconf;
} else {
// test code path
jc = new JobConf(hconf);
}
try {
createHiveOutputFormat(jc);
} catch (HiveException ex) {
logOutputFormatError(hconf, ex);
throw ex;
}
isCompressed = conf.getCompressed();
if (conf.isLinkedFileSink() && conf.isDirectInsert()) {
parent = Utilities.toTempPath(conf.getFinalDirName());
} else {
parent = Utilities.toTempPath(conf.getDirName());
}
statsFromRecordWriter = new boolean[numFiles];
AbstractSerDe serde = conf.getTableInfo().getSerDeClass().newInstance();
serde.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties(), null);
serializer = serde;
outputClass = serializer.getSerializedClass();
destTablePath = conf.getDestPath();
isInsertOverwrite = conf.getInsertOverwrite();
counterGroup = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVECOUNTERGROUP);
LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + (isCompressed ? " with compression" : ""));
// Timeout is chosen to make sure that even if one iteration takes more than
// half of the script.timeout but less than script.timeout, we will still
// be able to report progress.
timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2;
if (multiFileSpray) {
partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
int i = 0;
for (ExprNodeDesc e : conf.getPartitionCols()) {
partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
partitionObjectInspectors = initEvaluators(partitionEval, outputObjInspector);
prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(jc.getPartitionerClass(), null);
}
if (dpCtx != null && !inspectPartitionValues()) {
dpSetup();
}
if (lbCtx != null) {
lbSetup();
}
if (!bDynParts) {
fsp = new FSPaths(specPath, conf.isMmTable(), conf.isDirectInsert(), conf.getInsertOverwrite(), conf.getAcidOperation());
fsp.subdirAfterTxn = combinePathFragments(generateListBucketingDirName(null), unionPath);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("creating new paths " + System.identityHashCode(fsp) + " from ctor; childSpec " + unionPath + ": tmpPath " + fsp.buildTmpPath() + ", task path " + fsp.buildTaskOutputTempPath());
}
// createBucketFiles(fsp);
if (!this.isSkewedStoredAsSubDirectories) {
// special entry for non-DP case
valToPaths.put("", fsp);
}
}
final StoragePolicyValue tmpStorage = StoragePolicyValue.lookup(HiveConf.getVar(hconf, HIVE_TEMPORARY_TABLE_STORAGE));
if (isTemporary && fsp != null && tmpStorage != StoragePolicyValue.DEFAULT) {
// Not supported for temp tables.
assert !conf.isMmTable();
final Path outputPath = fsp.buildTaskOutputTempPath();
StoragePolicyShim shim = ShimLoader.getHadoopShims().getStoragePolicyShim(fs);
if (shim != null) {
// directory creation is otherwise within the writers
fs.mkdirs(outputPath);
shim.setStoragePolicy(outputPath, tmpStorage);
}
}
if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) {
// ROW__ID is always in the first field
recIdField = ((StructObjectInspector) outputObjInspector).getAllStructFieldRefs().get(0);
recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
// bucket is the second field in the record id
bucketField = recIdInspector.getAllStructFieldRefs().get(1);
bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
}
numRows = 0;
cntr = 1;
logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
statsMap.put(getCounterName(Counter.RECORDS_OUT), row_count);
// Setup hashcode
hashFunc = conf.getTableInfo().getBucketingVersion() == 2 ? ObjectInspectorUtils::getBucketHashCode : ObjectInspectorUtils::getBucketHashCodeOld;
// This count is used to get total number of rows in an insert query.
if (conf.getTableInfo() != null && conf.getTableInfo().getTableName() != null) {
statsMap.put(TOTAL_TABLE_ROWS_WRITTEN, row_count);
}
} catch (HiveException e) {
throw e;
} catch (Exception e) {
throw new HiveException(e);
}
}
Aggregations