Search in sources :

Example 1 with StoragePolicyValue

use of org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue in project hive by apache.

the class FileSinkOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    try {
        this.hconf = hconf;
        filesCreated = false;
        isNativeTable = !conf.getTableInfo().isNonNative();
        isTemporary = conf.isTemporary();
        multiFileSpray = conf.isMultiFileSpray();
        totalFiles = conf.getTotalFiles();
        numFiles = conf.getNumFiles();
        dpCtx = conf.getDynPartCtx();
        lbCtx = conf.getLbCtx();
        fsp = prevFsp = null;
        valToPaths = new HashMap<String, FSPaths>();
        taskId = Utilities.getTaskId(hconf);
        initializeSpecPath();
        fs = specPath.getFileSystem(hconf);
        try {
            createHiveOutputFormat(hconf);
        } catch (HiveException ex) {
            logOutputFormatError(hconf, ex);
            throw ex;
        }
        isCompressed = conf.getCompressed();
        parent = Utilities.toTempPath(conf.getDirName());
        statsFromRecordWriter = new boolean[numFiles];
        serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance();
        serializer.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties());
        outputClass = serializer.getSerializedClass();
        if (isLogInfoEnabled) {
            LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + (isCompressed ? " with compression" : ""));
        }
        // Timeout is chosen to make sure that even if one iteration takes more than
        // half of the script.timeout but less than script.timeout, we will still
        // be able to report progress.
        timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2;
        if (hconf instanceof JobConf) {
            jc = (JobConf) hconf;
        } else {
            // test code path
            jc = new JobConf(hconf);
        }
        if (multiFileSpray) {
            partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
            int i = 0;
            for (ExprNodeDesc e : conf.getPartitionCols()) {
                partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
            }
            partitionObjectInspectors = initEvaluators(partitionEval, outputObjInspector);
            prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(jc.getPartitionerClass(), null);
        }
        if (dpCtx != null) {
            dpSetup();
        }
        if (lbCtx != null) {
            lbSetup();
        }
        if (!bDynParts) {
            fsp = new FSPaths(specPath);
            // createBucketFiles(fsp);
            if (!this.isSkewedStoredAsSubDirectories) {
                // special entry for non-DP case
                valToPaths.put("", fsp);
            }
        }
        final StoragePolicyValue tmpStorage = StoragePolicyValue.lookup(HiveConf.getVar(hconf, HIVE_TEMPORARY_TABLE_STORAGE));
        if (isTemporary && fsp != null && tmpStorage != StoragePolicyValue.DEFAULT) {
            final Path outputPath = fsp.taskOutputTempPath;
            StoragePolicyShim shim = ShimLoader.getHadoopShims().getStoragePolicyShim(fs);
            if (shim != null) {
                // directory creation is otherwise within the writers
                fs.mkdirs(outputPath);
                shim.setStoragePolicy(outputPath, tmpStorage);
            }
        }
        if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) {
            // ROW__ID is always in the first field
            recIdField = ((StructObjectInspector) outputObjInspector).getAllStructFieldRefs().get(0);
            recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
            // bucket is the second field in the record id
            bucketField = recIdInspector.getAllStructFieldRefs().get(1);
            bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
        }
        numRows = 0;
        cntr = 1;
        logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
        statsMap.put(getCounterName(Counter.RECORDS_OUT), row_count);
    } catch (HiveException e) {
        throw e;
    } catch (Exception e) {
        e.printStackTrace();
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StoragePolicyValue(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JobConf(org.apache.hadoop.mapred.JobConf) StoragePolicyShim(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyShim) SubStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 Path (org.apache.hadoop.fs.Path)1 HiveKey (org.apache.hadoop.hive.ql.io.HiveKey)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 HiveFatalException (org.apache.hadoop.hive.ql.metadata.HiveFatalException)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 SubStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector)1 StoragePolicyShim (org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyShim)1 StoragePolicyValue (org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue)1 JobConf (org.apache.hadoop.mapred.JobConf)1