Search in sources :

Example 26 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class FileSinkOperator method createBucketFiles.

protected void createBucketFiles(FSPaths fsp) throws HiveException {
    try {
        int filesIdx = 0;
        Set<Integer> seenBuckets = new HashSet<Integer>();
        for (int idx = 0; idx < totalFiles; idx++) {
            if (this.getExecContext() != null && this.getExecContext().getFileId() != null) {
                if (isInfoEnabled) {
                    LOG.info("replace taskId from execContext ");
                }
                taskId = Utilities.replaceTaskIdFromFilename(taskId, this.getExecContext().getFileId());
                if (isInfoEnabled) {
                    LOG.info("new taskId: FS " + taskId);
                }
                assert !multiFileSpray;
                assert totalFiles == 1;
            }
            int bucketNum = 0;
            if (multiFileSpray) {
                key.setHashCode(idx);
                // Does this hashcode belong to this reducer
                int numReducers = totalFiles / numFiles;
                if (numReducers > 1) {
                    int currReducer = Integer.parseInt(Utilities.getTaskIdFromFilename(Utilities.getTaskId(hconf)));
                    int reducerIdx = prtner.getPartition(key, null, numReducers);
                    if (currReducer != reducerIdx) {
                        continue;
                    }
                }
                bucketNum = prtner.getBucket(key, null, totalFiles);
                if (seenBuckets.contains(bucketNum)) {
                    continue;
                }
                seenBuckets.add(bucketNum);
                bucketMap.put(bucketNum, filesIdx);
                taskId = Utilities.replaceTaskIdFromFilename(Utilities.getTaskId(hconf), bucketNum);
            }
            createBucketForFileIdx(fsp, filesIdx);
            filesIdx++;
        }
        assert filesIdx == numFiles;
        // in recent hadoop versions, use deleteOnExit to clean tmp files.
        if (isNativeTable && fs != null && fsp != null) {
            autoDelete = fs.deleteOnExit(fsp.outPaths[0]);
        }
    } catch (Exception e) {
        e.printStackTrace();
        throw new HiveException(e);
    }
    filesCreated = true;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HashSet(java.util.HashSet)

Example 27 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class FileSinkOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    runTimeNumRows++;
    /* Create list bucketing sub-directory only if stored-as-directories is on. */
    String lbDirName = null;
    lbDirName = (lbCtx == null) ? null : generateListBucketingDirName(row);
    if (!bDynParts && !filesCreated) {
        if (lbDirName != null) {
            FSPaths fsp2 = lookupListBucketingPaths(lbDirName);
        } else {
            createBucketFiles(fsp);
        }
    }
    try {
        updateProgress();
        // if DP is enabled, get the final output writers and prepare the real output row
        assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT : "input object inspector is not struct";
        if (bDynParts) {
            // we need to read bucket number which is the last column in value (after partition columns)
            if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
                numDynParts += 1;
            }
            // copy the DP column values from the input row to dpVals
            dpVals.clear();
            dpWritables.clear();
            ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol, numDynParts, (StructObjectInspector) inputObjInspectors[0], ObjectInspectorCopyOption.WRITABLE);
            // pass the null value along to the escaping process to determine what the dir should be
            for (Object o : dpWritables) {
                if (o == null || o.toString().length() == 0) {
                    dpVals.add(dpCtx.getDefaultPartitionName());
                } else {
                    dpVals.add(o.toString());
                }
            }
            String invalidPartitionVal;
            if ((invalidPartitionVal = HiveStringUtils.getPartitionValWithInvalidCharacter(dpVals, dpCtx.getWhiteListPattern())) != null) {
                throw new HiveFatalException("Partition value '" + invalidPartitionVal + "' contains a character not matched by whitelist pattern '" + dpCtx.getWhiteListPattern().toString() + "'.  " + "(configure with " + HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN.varname + ")");
            }
            fpaths = getDynOutPaths(dpVals, lbDirName);
            // use SubStructObjectInspector to serialize the non-partitioning columns in the input row
            recordValue = serializer.serialize(row, subSetOI);
        } else {
            if (lbDirName != null) {
                fpaths = lookupListBucketingPaths(lbDirName);
            } else {
                fpaths = fsp;
            }
            recordValue = serializer.serialize(row, inputObjInspectors[0]);
            // is kept track of in the SerDe)
            if (recordValue == null) {
                return;
            }
        }
        rowOutWriters = fpaths.outWriters;
        // check if all record writers implement statistics. if atleast one RW
        // doesn't implement stats interface we will fallback to conventional way
        // of gathering stats
        isCollectRWStats = areAllTrue(statsFromRecordWriter);
        if (conf.isGatherStats() && !isCollectRWStats) {
            SerDeStats stats = serializer.getSerDeStats();
            if (stats != null) {
                fpaths.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize());
            }
            fpaths.stat.addToStat(StatsSetupConst.ROW_COUNT, 1);
        }
        if ((++numRows == cntr) && isLogInfoEnabled) {
            cntr = logEveryNRows == 0 ? cntr * 10 : numRows + logEveryNRows;
            if (cntr < 0 || numRows < 0) {
                cntr = 0;
                numRows = 1;
            }
            LOG.info(toString() + ": records written - " + numRows);
        }
        // This should always be 0 for the final result file
        int writerOffset = findWriterOffset(row);
        // pass the row rather than recordValue.
        if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) {
            rowOutWriters[writerOffset].write(recordValue);
        } else if (conf.getWriteType() == AcidUtils.Operation.INSERT) {
            fpaths.updaters[writerOffset].insert(conf.getTransactionId(), row);
        } else {
            // TODO I suspect we could skip much of the stuff above this in the function in the case
            // of update and delete.  But I don't understand all of the side effects of the above
            // code and don't want to skip over it yet.
            // Find the bucket id, and switch buckets if need to
            ObjectInspector rowInspector = bDynParts ? subSetOI : outputObjInspector;
            Object recId = ((StructObjectInspector) rowInspector).getStructFieldData(row, recIdField);
            int bucketNum = bucketInspector.get(recIdInspector.getStructFieldData(recId, bucketField));
            if (fpaths.acidLastBucket != bucketNum) {
                fpaths.acidLastBucket = bucketNum;
                // Switch files
                fpaths.updaters[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : ++fpaths.acidFileOffset] = HiveFileFormatUtils.getAcidRecordUpdater(jc, conf.getTableInfo(), bucketNum, conf, fpaths.outPaths[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : fpaths.acidFileOffset], rowInspector, reporter, 0);
                if (isDebugEnabled) {
                    LOG.debug("Created updater for bucket number " + bucketNum + " using file " + fpaths.outPaths[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : fpaths.acidFileOffset]);
                }
            }
            if (conf.getWriteType() == AcidUtils.Operation.UPDATE) {
                fpaths.updaters[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : fpaths.acidFileOffset].update(conf.getTransactionId(), row);
            } else if (conf.getWriteType() == AcidUtils.Operation.DELETE) {
                fpaths.updaters[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : fpaths.acidFileOffset].delete(conf.getTransactionId(), row);
            } else {
                throw new HiveException("Unknown write type " + conf.getWriteType().toString());
            }
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) SubStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 28 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class FileSinkOperator method jobCloseOp.

@Override
public void jobCloseOp(Configuration hconf, boolean success) throws HiveException {
    try {
        if ((conf != null) && isNativeTable) {
            Path specPath = conf.getDirName();
            DynamicPartitionCtx dpCtx = conf.getDynPartCtx();
            if (conf.isLinkedFileSink() && (dpCtx != null)) {
                specPath = conf.getParentDir();
            }
            Utilities.mvFileToFinalPath(specPath, hconf, success, LOG, dpCtx, conf, reporter);
        }
    } catch (IOException e) {
        throw new HiveException(e);
    }
    super.jobCloseOp(hconf, success);
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) IOException(java.io.IOException)

Example 29 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class FileSinkOperator method publishStats.

private void publishStats() throws HiveException {
    boolean isStatsReliable = conf.isStatsReliable();
    // Initializing a stats publisher
    StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
    if (statsPublisher == null) {
        // just return, stats gathering should not block the main query
        LOG.error("StatsPublishing error: StatsPublisher is not initialized.");
        if (isStatsReliable) {
            throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg());
        }
        return;
    }
    StatsCollectionContext sContext = new StatsCollectionContext(hconf);
    sContext.setStatsTmpDir(conf.getStatsTmpDir());
    if (!statsPublisher.connect(sContext)) {
        // just return, stats gathering should not block the main query
        LOG.error("StatsPublishing error: cannot connect to database");
        if (isStatsReliable) {
            throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg());
        }
        return;
    }
    String spSpec = conf.getStaticSpec();
    for (Map.Entry<String, FSPaths> entry : valToPaths.entrySet()) {
        // DP/LB
        String fspKey = entry.getKey();
        FSPaths fspValue = entry.getValue();
        // adds the taskId to the fspKey.
        if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
            String taskID = Utilities.getTaskIdFromFilename(fspKey);
            // if length of (prefix/ds=__HIVE_DEFAULT_PARTITION__/000000_0) is greater than max key prefix
            // and if (prefix/ds=10/000000_0) is less than max key prefix, then former will get hashed
            // to a smaller prefix (MD5hash/000000_0) and later will stored as such in staging stats table.
            // When stats gets aggregated in StatsTask only the keys that starts with "prefix" will be fetched.
            // Now that (prefix/ds=__HIVE_DEFAULT_PARTITION__) is hashed to a smaller prefix it will
            // not be retrieved from staging table and hence not aggregated. To avoid this issue
            // we will remove the taskId from the key which is redundant anyway.
            fspKey = fspKey.split(taskID)[0];
        }
        // split[0] = DP, split[1] = LB
        String[] split = splitKey(fspKey);
        String dpSpec = split[0];
        // key = "database.table/SP/DP/"LB/
        // Hive store lowercase table name in metastore, and Counters is character case sensitive, so we
        // use lowercase table name as prefix here, as StatsTask get table name from metastore to fetch counter.
        String prefix = conf.getTableInfo().getTableName().toLowerCase();
        prefix = Utilities.join(prefix, spSpec, dpSpec);
        prefix = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR;
        Map<String, String> statsToPublish = new HashMap<String, String>();
        for (String statType : fspValue.stat.getStoredStats()) {
            statsToPublish.put(statType, Long.toString(fspValue.stat.getStat(statType)));
        }
        if (!statsPublisher.publishStat(prefix, statsToPublish)) {
            // Not changing the interface to maintain backward compatibility
            if (isStatsReliable) {
                throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg());
            }
        }
    }
    sContext.setIndexForTezUnion(this.getIndexForTezUnion());
    if (!statsPublisher.closeConnection(sContext)) {
        // Not changing the interface to maintain backward compatibility
        if (isStatsReliable) {
            throw new HiveException(ErrorMsg.STATSPUBLISHER_CLOSING_ERROR.getErrorCodedMsg());
        }
    }
}
Also used : StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HashMap(java.util.HashMap) Map(java.util.Map) HashMap(java.util.HashMap)

Example 30 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class FileSinkOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    try {
        this.hconf = hconf;
        filesCreated = false;
        isNativeTable = !conf.getTableInfo().isNonNative();
        isTemporary = conf.isTemporary();
        multiFileSpray = conf.isMultiFileSpray();
        totalFiles = conf.getTotalFiles();
        numFiles = conf.getNumFiles();
        dpCtx = conf.getDynPartCtx();
        lbCtx = conf.getLbCtx();
        fsp = prevFsp = null;
        valToPaths = new HashMap<String, FSPaths>();
        taskId = Utilities.getTaskId(hconf);
        initializeSpecPath();
        fs = specPath.getFileSystem(hconf);
        try {
            createHiveOutputFormat(hconf);
        } catch (HiveException ex) {
            logOutputFormatError(hconf, ex);
            throw ex;
        }
        isCompressed = conf.getCompressed();
        parent = Utilities.toTempPath(conf.getDirName());
        statsFromRecordWriter = new boolean[numFiles];
        serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance();
        serializer.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties());
        outputClass = serializer.getSerializedClass();
        if (isLogInfoEnabled) {
            LOG.info("Using serializer : " + serializer + " and formatter : " + hiveOutputFormat + (isCompressed ? " with compression" : ""));
        }
        // Timeout is chosen to make sure that even if one iteration takes more than
        // half of the script.timeout but less than script.timeout, we will still
        // be able to report progress.
        timeOut = hconf.getInt("mapred.healthChecker.script.timeout", 600000) / 2;
        if (hconf instanceof JobConf) {
            jc = (JobConf) hconf;
        } else {
            // test code path
            jc = new JobConf(hconf);
        }
        if (multiFileSpray) {
            partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
            int i = 0;
            for (ExprNodeDesc e : conf.getPartitionCols()) {
                partitionEval[i++] = ExprNodeEvaluatorFactory.get(e);
            }
            partitionObjectInspectors = initEvaluators(partitionEval, outputObjInspector);
            prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(jc.getPartitionerClass(), null);
        }
        if (dpCtx != null) {
            dpSetup();
        }
        if (lbCtx != null) {
            lbSetup();
        }
        if (!bDynParts) {
            fsp = new FSPaths(specPath);
            // createBucketFiles(fsp);
            if (!this.isSkewedStoredAsSubDirectories) {
                // special entry for non-DP case
                valToPaths.put("", fsp);
            }
        }
        final StoragePolicyValue tmpStorage = StoragePolicyValue.lookup(HiveConf.getVar(hconf, HIVE_TEMPORARY_TABLE_STORAGE));
        if (isTemporary && fsp != null && tmpStorage != StoragePolicyValue.DEFAULT) {
            final Path outputPath = fsp.taskOutputTempPath;
            StoragePolicyShim shim = ShimLoader.getHadoopShims().getStoragePolicyShim(fs);
            if (shim != null) {
                // directory creation is otherwise within the writers
                fs.mkdirs(outputPath);
                shim.setStoragePolicy(outputPath, tmpStorage);
            }
        }
        if (conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE) {
            // ROW__ID is always in the first field
            recIdField = ((StructObjectInspector) outputObjInspector).getAllStructFieldRefs().get(0);
            recIdInspector = (StructObjectInspector) recIdField.getFieldObjectInspector();
            // bucket is the second field in the record id
            bucketField = recIdInspector.getAllStructFieldRefs().get(1);
            bucketInspector = (IntObjectInspector) bucketField.getFieldObjectInspector();
        }
        numRows = 0;
        cntr = 1;
        logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
        statsMap.put(getCounterName(Counter.RECORDS_OUT), row_count);
    } catch (HiveException e) {
        throw e;
    } catch (Exception e) {
        e.printStackTrace();
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StoragePolicyValue(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyValue) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JobConf(org.apache.hadoop.mapred.JobConf) StoragePolicyShim(org.apache.hadoop.hive.shims.HadoopShims.StoragePolicyShim) SubStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)364 IOException (java.io.IOException)144 ArrayList (java.util.ArrayList)64 Table (org.apache.hadoop.hive.ql.metadata.Table)60 Path (org.apache.hadoop.fs.Path)55 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)42 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)41 Partition (org.apache.hadoop.hive.ql.metadata.Partition)36 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)35 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)35 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)31 FileNotFoundException (java.io.FileNotFoundException)26 FileSystem (org.apache.hadoop.fs.FileSystem)26 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)26 URISyntaxException (java.net.URISyntaxException)25 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)25 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)24 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)23 HashMap (java.util.HashMap)21 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)21