Examples with IntObjectInspector - org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector

Example 6 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class GenericUDFInBloomFilter method evaluate.

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    // Return if either of the arguments is null
    if (arguments[0].get() == null || arguments[1].get() == null) {
        return null;
    }
    if (!initializedBloomFilter) {
        // Setup the bloom filter once
        InputStream in = null;
        try {
            BytesWritable bw = (BytesWritable) arguments[1].get();
            byte[] bytes = new byte[bw.getLength()];
            System.arraycopy(bw.getBytes(), 0, bytes, 0, bw.getLength());
            in = new NonSyncByteArrayInputStream(bytes);
            bloomFilter = BloomKFilter.deserialize(in);
        } catch (IOException e) {
            throw new HiveException(e);
        } finally {
            IOUtils.closeStream(in);
        }
        initializedBloomFilter = true;
    }
    // Check if the value is in bloom filter
    switch(((PrimitiveObjectInspector) valObjectInspector).getTypeInfo().getPrimitiveCategory()) {
        case BOOLEAN:
            boolean vBoolean = ((BooleanObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vBoolean ? 1 : 0);
        case BYTE:
            byte vByte = ((ByteObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vByte);
        case SHORT:
            short vShort = ((ShortObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vShort);
        case INT:
            int vInt = ((IntObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vInt);
        case LONG:
            long vLong = ((LongObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testLong(vLong);
        case FLOAT:
            float vFloat = ((FloatObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vFloat);
        case DOUBLE:
            double vDouble = ((DoubleObjectInspector) valObjectInspector).get(arguments[0].get());
            return bloomFilter.testDouble(vDouble);
        case DECIMAL:
            HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            int startIdx = vDecimal.toBytes(scratchBuffer);
            return bloomFilter.testBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx);
        case DATE:
            DateWritableV2 vDate = ((DateObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testLong(vDate.getDays());
        case TIMESTAMP:
            Timestamp vTimeStamp = ((TimestampObjectInspector) valObjectInspector).getPrimitiveJavaObject(arguments[0].get());
            return bloomFilter.testLong(vTimeStamp.toEpochMilli());
        case CHAR:
            Text vChar = ((HiveCharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getStrippedValue();
            return bloomFilter.testBytes(vChar.getBytes(), 0, vChar.getLength());
        case VARCHAR:
            Text vVarchar = ((HiveVarcharObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get()).getTextValue();
            return bloomFilter.testBytes(vVarchar.getBytes(), 0, vVarchar.getLength());
        case STRING:
            Text vString = ((StringObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vString.getBytes(), 0, vString.getLength());
        case BINARY:
            BytesWritable vBytes = ((BinaryObjectInspector) valObjectInspector).getPrimitiveWritableObject(arguments[0].get());
            return bloomFilter.testBytes(vBytes.getBytes(), 0, vBytes.getLength());
        default:
            throw new UDFArgumentTypeException(0, "Bad primitive category " + ((PrimitiveTypeInfo) valObjectInspector).getPrimitiveCategory());
    }
}

Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) InputStream(java.io.InputStream) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BytesWritable(org.apache.hadoop.io.BytesWritable) NonSyncByteArrayInputStream(org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException)

Example 7 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class GenericUDFFromUnixTime method initialize.

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    checkArgsSize(arguments, 1, 2);
    for (int i = 0; i < arguments.length; i++) {
        checkArgPrimitive(arguments, i);
    }
    PrimitiveObjectInspector arg0OI = (PrimitiveObjectInspector) arguments[0];
    switch(arg0OI.getPrimitiveCategory()) {
        case INT:
            inputIntOI = (IntObjectInspector) arguments[0];
            break;
        case LONG:
            inputLongOI = (LongObjectInspector) arguments[0];
            break;
        default:
            throw new UDFArgumentException("The function from_unixtime takes only int/long types for first argument. Got Type:" + arg0OI.getPrimitiveCategory().name());
    }
    if (arguments.length == 2) {
        checkArgGroups(arguments, 1, inputTypes, STRING_GROUP);
        obtainStringConverter(arguments, 1, inputTypes, converters);
    }
    if (timeZone == null) {
        timeZone = SessionState.get() == null ? new HiveConf().getLocalTimeZone() : SessionState.get().getConf().getLocalTimeZone();
        FORMATTER.withZone(timeZone);
    }
    return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}

Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 8 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class KuduSerDe method serialize.

/**
 * Serialize an object by navigating inside the Object with the ObjectInspector.
 */
@Override
public KuduWritable serialize(Object obj, ObjectInspector objectInspector) throws SerDeException {
    Preconditions.checkArgument(objectInspector.getCategory() == Category.STRUCT);
    StructObjectInspector soi = (StructObjectInspector) objectInspector;
    List<Object> writableObj = soi.getStructFieldsDataAsList(obj);
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    PartialRow row = schema.newPartialRow();
    for (int i = 0; i < schema.getColumnCount(); i++) {
        StructField field = fields.get(i);
        Object value = writableObj.get(i);
        if (value == null) {
            row.setNull(i);
        } else {
            Type type = schema.getColumnByIndex(i).getType();
            ObjectInspector inspector = field.getFieldObjectInspector();
            switch(type) {
                case BOOL:
                    boolean boolVal = ((BooleanObjectInspector) inspector).get(value);
                    row.addBoolean(i, boolVal);
                    break;
                case INT8:
                    byte byteVal = ((ByteObjectInspector) inspector).get(value);
                    row.addByte(i, byteVal);
                    break;
                case INT16:
                    short shortVal = ((ShortObjectInspector) inspector).get(value);
                    row.addShort(i, shortVal);
                    break;
                case INT32:
                    int intVal = ((IntObjectInspector) inspector).get(value);
                    row.addInt(i, intVal);
                    break;
                case INT64:
                    long longVal = ((LongObjectInspector) inspector).get(value);
                    row.addLong(i, longVal);
                    break;
                case UNIXTIME_MICROS:
                    // Calling toSqlTimestamp and using the addTimestamp API ensures we properly
                    // convert Hive localDateTime to UTC.
                    java.sql.Timestamp timestampVal = ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(value).toSqlTimestamp();
                    row.addTimestamp(i, timestampVal);
                    break;
                case DECIMAL:
                    HiveDecimal decimalVal = ((HiveDecimalObjectInspector) inspector).getPrimitiveJavaObject(value);
                    row.addDecimal(i, decimalVal.bigDecimalValue());
                    break;
                case FLOAT:
                    float floatVal = ((FloatObjectInspector) inspector).get(value);
                    row.addFloat(i, floatVal);
                    break;
                case DOUBLE:
                    double doubleVal = ((DoubleObjectInspector) inspector).get(value);
                    row.addDouble(i, doubleVal);
                    break;
                case STRING:
                    String stringVal = ((StringObjectInspector) inspector).getPrimitiveJavaObject(value);
                    row.addString(i, stringVal);
                    break;
                case BINARY:
                    byte[] bytesVal = ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(value);
                    row.addBinary(i, bytesVal);
                    break;
                default:
                    throw new SerDeException("Unsupported column type: " + type.name());
            }
        }
    }
    return new KuduWritable(row);
}

Also used : LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) PartialRow(org.apache.kudu.client.PartialRow) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) Type(org.apache.kudu.Type) KuduHiveUtils.toHiveType(org.apache.hadoop.hive.kudu.KuduHiveUtils.toHiveType) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 9 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class FixAcidKeyIndex method validate.

public static AcidKeyIndexValidationResult validate(Configuration conf, Path inputPath) throws IOException {
    AcidKeyIndexValidationResult result = new AcidKeyIndexValidationResult();
    FileSystem fs = inputPath.getFileSystem(conf);
    try (Reader reader = OrcFile.createReader(fs, inputPath);
        RecordReader rr = reader.rows()) {
        List<StripeInformation> stripes = reader.getStripes();
        RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
        StructObjectInspector soi = (StructObjectInspector) reader.getObjectInspector();
        // struct<operation:int,originalTransaction:bigint,bucket:int,rowId:bigint,currentTransaction:bigint
        List<? extends StructField> structFields = soi.getAllStructFieldRefs();
        StructField transactionField = structFields.get(1);
        LongObjectInspector transactionOI = (LongObjectInspector) transactionField.getFieldObjectInspector();
        StructField bucketField = structFields.get(2);
        IntObjectInspector bucketOI = (IntObjectInspector) bucketField.getFieldObjectInspector();
        StructField rowIdField = structFields.get(3);
        LongObjectInspector rowIdOI = (LongObjectInspector) rowIdField.getFieldObjectInspector();
        long rowsProcessed = 0;
        for (int i = 0; i < stripes.size(); i++) {
            rowsProcessed += stripes.get(i).getNumberOfRows();
            rr.seekToRow(rowsProcessed - 1);
            OrcStruct row = (OrcStruct) rr.next(null);
            long lastTransaction = transactionOI.get(soi.getStructFieldData(row, transactionField));
            int lastBucket = bucketOI.get(soi.getStructFieldData(row, bucketField));
            long lastRowId = rowIdOI.get(soi.getStructFieldData(row, rowIdField));
            RecordIdentifier recordIdentifier = new RecordIdentifier(lastTransaction, lastBucket, lastRowId);
            result.recordIdentifiers.add(recordIdentifier);
            if (stripes.size() != keyIndex.length || keyIndex[i] == null || recordIdentifier.compareTo(keyIndex[i]) != 0) {
                result.isValid = false;
            }
        }
    }
    return result;
}

Example 10 with IntObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector in project hive by apache.

the class FileSinkOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    try {
        this.hconf = hconf;
        filesCreated = false;
        isTemporary = conf.isTemporary();
        multiFileSpray = conf.isMultiFileSpray();
        this.isBucketed = hconf.getInt(hive_metastoreConstants.BUCKET_COUNT, 0) > 0;
        totalFiles = conf.getTotalFiles();
        numFiles = conf.getNumFiles();
        dpCtx = conf.getDynPartCtx();
        lbCtx = conf.getLbCtx();
        fsp = prevFsp = null;
        valToPaths = new HashMap<String, FSPaths>();
        taskId = originalTaskId = Utilities.getTaskId(hconf);
        initializeSpecPath();
        fs = specPath.getFileSystem(hconf);
        if (hconf instanceof JobConf) {
            jc = (JobConf) hconf;
        } else {
            // test code path
            jc = new JobConf(hconf);
        }
        try {
            createHiveOutputFormat(jc);
        } catch (HiveException ex) {
            logOutputFormatError(hconf, ex);
            throw ex;
        }
        isCompressed = conf.getCompressed();
        if (conf.isLinkedFileSink() && conf.isDirectInsert()) {
            parent = Utilities.toTempPath(conf.getFinalDirName());
        } else {
            parent = Utilities.toTempPath(conf.getDirName());
        }
        statsFromRecordWriter = new boolean[numFiles];
        AbstractSerDe serde = conf.getTableInfo().getSerDeClass().newInstance();
        serde.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties(), null);
        serializer = serde;
        outputClass = serializer.getSerializedClass();
        destTablePath = conf.getDestPath();
        isInsertOverwrite = conf.getInsertOverwrite();
        counterGroup = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVECOUNTERGROUP);
        LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + (isCompressed ? " with compression" : ""));
        // Timeout is chosen to make sure that even if one iteration takes more than
        // half of the script.timeout but less than script.timeout, we will still
        // be able to report progress.
        timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2;
        if (multiFileSpray) {
            partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
            int i = 0;
            for (ExprNodeDesc e : conf.getPartitionCols()) {
                partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
            }
            partitionObjectInspectors = initEvaluators(partitionEval, outputObjInspector);
            prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(jc.getPartitionerClass(), null);
        }
        if (dpCtx != null && !inspectPartitionValues()) {
            dpSetup();
        }
        if (lbCtx != null) {
            lbSetup();
        }
        if (!bDynParts) {
            fsp = new FSPaths(specPath, conf.isMmTable(), conf.isDirectInsert(), conf.getInsertOverwrite(), conf.getAcidOperation());
            fsp.subdirAfterTxn = combinePathFragments(generateListBucketingDirName(null), unionPath);
            if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                Utilities.FILE_OP_LOGGER.trace("creating new paths " + System.identityHashCode(fsp) + " from ctor; childSpec " + unionPath + ": tmpPath " + fsp.buildTmpPath() + ", task path " + fsp.buildTaskOutputTempPath());
            }
            // createBucketFiles(fsp);
            if (!this.isSkewedStoredAsSubDirectories) {
                // special entry for non-DP case
                valToPaths.put("", fsp);
            }
        }
        final StoragePolicyValue tmpStorage = StoragePolicyValue.lookup(HiveConf.getVar(hconf, HIVE_TEMPORARY_TABLE_STORAGE));
        if (isTemporary && fsp != null && tmpStorage != StoragePolicyValue.DEFAULT) {
            // Not supported for temp tables.
            assert !conf.isMmTable();
            final Path outputPath = fsp.buildTaskOutputTempPath();
            StoragePolicyShim shim = ShimLoader.getHadoopShims().getStoragePolicyShim(fs);
            if (shim != null) {
                // directory creation is otherwise within the writers
                fs.mkdirs(outputPath);
                shim.setStoragePolicy(outputPath, tmpStorage);
            }
        }
        if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) {
            // ROW__ID is always in the first field
            recIdField = ((StructObjectInspector) outputObjInspector).getAllStructFieldRefs().get(0);
            recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
            // bucket is the second field in the record id
            bucketField = recIdInspector.getAllStructFieldRefs().get(1);
            bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
        }
        numRows = 0;
        cntr = 1;
        logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
        statsMap.put(getCounterName(Counter.RECORDS_OUT), row_count);
        // Setup hashcode
        hashFunc = conf.getTableInfo().getBucketingVersion() == 2 ? ObjectInspectorUtils::getBucketHashCode : ObjectInspectorUtils::getBucketHashCodeOld;
        // This count is used to get total number of rows in an insert query.
        if (conf.getTableInfo() != null && conf.getTableInfo().getTableName() != null) {
            statsMap.put(TOTAL_TABLE_ROWS_WRITTEN, row_count);
        }
    } catch (HiveException e) {
        throw e;
    } catch (Exception e) {
        throw new HiveException(e);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StoragePolicyValue(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JobConf(org.apache.hadoop.mapred.JobConf) StoragePolicyShim(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyShim) SubStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)26 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)18 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)18 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)17 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)17 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)17 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)17 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)17 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)16 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)16 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)15 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)15 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)14 DateObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector)13 HiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector)13 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)12 HiveVarcharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector)12 Text (org.apache.hadoop.io.Text)10 Map (java.util.Map)9 ArrayList (java.util.ArrayList)8