Search in sources :

Example 1 with HiveFatalException

use of org.apache.hadoop.hive.ql.metadata.HiveFatalException in project hive by apache.

the class FileSinkOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    runTimeNumRows++;
    /* Create list bucketing sub-directory only if stored-as-directories is on. */
    String lbDirName = null;
    lbDirName = (lbCtx == null) ? null : generateListBucketingDirName(row);
    if (!bDynParts && !filesCreated) {
        if (lbDirName != null) {
            FSPaths fsp2 = lookupListBucketingPaths(lbDirName);
        } else {
            createBucketFiles(fsp);
        }
    }
    try {
        updateProgress();
        // if DP is enabled, get the final output writers and prepare the real output row
        assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT : "input object inspector is not struct";
        if (bDynParts) {
            // we need to read bucket number which is the last column in value (after partition columns)
            if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
                numDynParts += 1;
            }
            // copy the DP column values from the input row to dpVals
            dpVals.clear();
            dpWritables.clear();
            ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol, numDynParts, (StructObjectInspector) inputObjInspectors[0], ObjectInspectorCopyOption.WRITABLE);
            // pass the null value along to the escaping process to determine what the dir should be
            for (Object o : dpWritables) {
                if (o == null || o.toString().length() == 0) {
                    dpVals.add(dpCtx.getDefaultPartitionName());
                } else {
                    dpVals.add(o.toString());
                }
            }
            String invalidPartitionVal;
            if ((invalidPartitionVal = HiveStringUtils.getPartitionValWithInvalidCharacter(dpVals, dpCtx.getWhiteListPattern())) != null) {
                throw new HiveFatalException("Partition value '" + invalidPartitionVal + "' contains a character not matched by whitelist pattern '" + dpCtx.getWhiteListPattern().toString() + "'.  " + "(configure with " + HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN.varname + ")");
            }
            fpaths = getDynOutPaths(dpVals, lbDirName);
            // use SubStructObjectInspector to serialize the non-partitioning columns in the input row
            recordValue = serializer.serialize(row, subSetOI);
        } else {
            if (lbDirName != null) {
                fpaths = lookupListBucketingPaths(lbDirName);
            } else {
                fpaths = fsp;
            }
            recordValue = serializer.serialize(row, inputObjInspectors[0]);
            // is kept track of in the SerDe)
            if (recordValue == null) {
                return;
            }
        }
        rowOutWriters = fpaths.outWriters;
        // check if all record writers implement statistics. if atleast one RW
        // doesn't implement stats interface we will fallback to conventional way
        // of gathering stats
        isCollectRWStats = areAllTrue(statsFromRecordWriter);
        if (conf.isGatherStats() && !isCollectRWStats) {
            SerDeStats stats = serializer.getSerDeStats();
            if (stats != null) {
                fpaths.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize());
            }
            fpaths.stat.addToStat(StatsSetupConst.ROW_COUNT, 1);
        }
        if ((++numRows == cntr) && isLogInfoEnabled) {
            cntr = logEveryNRows == 0 ? cntr * 10 : numRows + logEveryNRows;
            if (cntr < 0 || numRows < 0) {
                cntr = 0;
                numRows = 1;
            }
            LOG.info(toString() + ": records written - " + numRows);
        }
        // This should always be 0 for the final result file
        int writerOffset = findWriterOffset(row);
        // pass the row rather than recordValue.
        if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) {
            rowOutWriters[writerOffset].write(recordValue);
        } else if (conf.getWriteType() == AcidUtils.Operation.INSERT) {
            fpaths.updaters[writerOffset].insert(conf.getTransactionId(), row);
        } else {
            // TODO I suspect we could skip much of the stuff above this in the function in the case
            // of update and delete.  But I don't understand all of the side effects of the above
            // code and don't want to skip over it yet.
            // Find the bucket id, and switch buckets if need to
            ObjectInspector rowInspector = bDynParts ? subSetOI : outputObjInspector;
            Object recId = ((StructObjectInspector) rowInspector).getStructFieldData(row, recIdField);
            int bucketNum = bucketInspector.get(recIdInspector.getStructFieldData(recId, bucketField));
            if (fpaths.acidLastBucket != bucketNum) {
                fpaths.acidLastBucket = bucketNum;
                // Switch files
                fpaths.updaters[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : ++fpaths.acidFileOffset] = HiveFileFormatUtils.getAcidRecordUpdater(jc, conf.getTableInfo(), bucketNum, conf, fpaths.outPaths[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : fpaths.acidFileOffset], rowInspector, reporter, 0);
                if (isDebugEnabled) {
                    LOG.debug("Created updater for bucket number " + bucketNum + " using file " + fpaths.outPaths[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : fpaths.acidFileOffset]);
                }
            }
            if (conf.getWriteType() == AcidUtils.Operation.UPDATE) {
                fpaths.updaters[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : fpaths.acidFileOffset].update(conf.getTransactionId(), row);
            } else if (conf.getWriteType() == AcidUtils.Operation.DELETE) {
                fpaths.updaters[conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED) ? 0 : fpaths.acidFileOffset].delete(conf.getTransactionId(), row);
            } else {
                throw new HiveException("Unknown write type " + conf.getWriteType().toString());
            }
        }
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (SerDeException e) {
        throw new HiveException(e);
    }
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) SubStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 2 with HiveFatalException

use of org.apache.hadoop.hive.ql.metadata.HiveFatalException in project hive by apache.

the class FileSinkOperator method getDynOutPaths.

protected FSPaths getDynOutPaths(List<String> row, String lbDirName) throws HiveException {
    FSPaths fp;
    // get the path corresponding to the dynamic partition columns,
    String dpDir = getDynPartDirectory(row, dpColNames);
    String pathKey = null;
    if (dpDir != null) {
        dpDir = appendToSource(lbDirName, dpDir);
        pathKey = dpDir;
        if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
            String buckNum = row.get(row.size() - 1);
            taskId = Utilities.replaceTaskIdFromFilename(taskId, buckNum);
            pathKey = appendToSource(taskId, dpDir);
        }
        FSPaths fsp2 = valToPaths.get(pathKey);
        if (fsp2 == null) {
            // check # of dp
            if (valToPaths.size() > maxPartitions) {
                // we cannot proceed and need to tell the hive client that retries won't succeed either
                throw new HiveFatalException(ErrorMsg.DYNAMIC_PARTITIONS_TOO_MANY_PER_NODE_ERROR.getErrorCodedMsg() + "Maximum was set to " + maxPartitions + " partitions per node" + ", number of dynamic partitions on this node: " + valToPaths.size());
            }
            if (!conf.getDpSortState().equals(DPSortState.NONE) && prevFsp != null) {
                // close the previous fsp as it is no longer needed
                prevFsp.closeWriters(false);
                // stats from the record writer and store in the previous fsp that is cached
                if (conf.isGatherStats() && isCollectRWStats) {
                    SerDeStats stats = null;
                    if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) {
                        RecordWriter outWriter = prevFsp.outWriters[0];
                        if (outWriter != null) {
                            stats = ((StatsProvidingRecordWriter) outWriter).getStats();
                        }
                    } else if (prevFsp.updaters[0] != null) {
                        stats = prevFsp.updaters[0].getStats();
                    }
                    if (stats != null) {
                        prevFsp.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize());
                        prevFsp.stat.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount());
                    }
                }
                // let writers release the memory for garbage collection
                prevFsp.outWriters[0] = null;
                prevFsp = null;
            }
            fsp2 = createNewPaths(dpDir);
            if (prevFsp == null) {
                prevFsp = fsp2;
            }
            if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
                createBucketForFileIdx(fsp2, 0);
                valToPaths.put(pathKey, fsp2);
            }
        }
        fp = fsp2;
    } else {
        fp = fsp;
    }
    return fp;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) StatsProvidingRecordWriter(org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException)

Aggregations

HiveFatalException (org.apache.hadoop.hive.ql.metadata.HiveFatalException)2 SerDeStats (org.apache.hadoop.hive.serde2.SerDeStats)2 IOException (java.io.IOException)1 StatsProvidingRecordWriter (org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 SubStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector)1 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)1