Search in sources :

Example 6 with SerDeStats

use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.

the class LazySimpleSerDe method initialize.

/**
   * Initialize the SerDe given the parameters. serialization.format: separator
   * char or byte code (only supports byte-value up to 127) columns:
   * ","-separated column names columns.types: ",", ":", or ";"-separated column
   * types
   *
   * @see AbstractSerDe#initialize(Configuration, Properties)
   */
@Override
public void initialize(Configuration job, Properties tbl) throws SerDeException {
    super.initialize(job, tbl);
    serdeParams = new LazySerDeParameters(job, tbl, getClass().getName());
    // Create the ObjectInspectors for the fields
    cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), new LazyObjectInspectorParametersImpl(serdeParams));
    cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector);
    serializedSize = 0;
    stats = new SerDeStats();
    lastOperationSerialize = false;
    lastOperationDeserialize = false;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) LazyObjectInspectorParametersImpl(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl)

Example 7 with SerDeStats

use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.

the class ColumnarSerDeBase method initialize.

protected void initialize(int size) throws SerDeException {
    field = new BytesRefWritable[size];
    for (int i = 0; i < size; i++) {
        field[i] = new BytesRefWritable();
        serializeCache.set(i, field[i]);
    }
    serializedSize = 0;
    stats = new SerDeStats();
    lastOperationSerialize = false;
    lastOperationDeserialize = false;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats)

Example 8 with SerDeStats

use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.

the class MapOperator method populateVirtualColumnValues.

public static Object[] populateVirtualColumnValues(ExecMapperContext ctx, List<VirtualColumn> vcs, Object[] vcValues, Deserializer deserializer) {
    if (vcs == null) {
        return vcValues;
    }
    if (vcValues == null) {
        vcValues = new Object[vcs.size()];
    }
    for (int i = 0; i < vcs.size(); i++) {
        switch(vcs.get(i)) {
            case FILENAME:
                if (ctx.inputFileChanged()) {
                    vcValues[i] = new Text(ctx.getCurrentInputPath().toString());
                }
                break;
            case BLOCKOFFSET:
                {
                    long current = ctx.getIoCxt().getCurrentBlockStart();
                    LongWritable old = (LongWritable) vcValues[i];
                    if (old == null) {
                        old = new LongWritable(current);
                        vcValues[i] = old;
                        continue;
                    }
                    if (current != old.get()) {
                        old.set(current);
                    }
                }
                break;
            case ROWOFFSET:
                {
                    long current = ctx.getIoCxt().getCurrentRow();
                    LongWritable old = (LongWritable) vcValues[i];
                    if (old == null) {
                        old = new LongWritable(current);
                        vcValues[i] = old;
                        continue;
                    }
                    if (current != old.get()) {
                        old.set(current);
                    }
                }
                break;
            case RAWDATASIZE:
                long current = 0L;
                SerDeStats stats = deserializer.getSerDeStats();
                if (stats != null) {
                    current = stats.getRawDataSize();
                }
                LongWritable old = (LongWritable) vcValues[i];
                if (old == null) {
                    old = new LongWritable(current);
                    vcValues[i] = old;
                    continue;
                }
                if (current != old.get()) {
                    old.set(current);
                }
                break;
            case ROWID:
                if (ctx.getIoCxt().getRecordIdentifier() == null) {
                    vcValues[i] = null;
                } else {
                    if (vcValues[i] == null) {
                        vcValues[i] = new Object[RecordIdentifier.Field.values().length];
                    }
                    RecordIdentifier.StructInfo.toArray(ctx.getIoCxt().getRecordIdentifier(), (Object[]) vcValues[i]);
                    //so we don't accidentally cache the value; shouldn't
                    ctx.getIoCxt().setRecordIdentifier(null);
                //happen since IO layer either knows how to produce ROW__ID or not - but to be safe
                }
                break;
        }
    }
    return vcValues;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable)

Example 9 with SerDeStats

use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.

the class OrcRecordUpdater method getStats.

@Override
public SerDeStats getStats() {
    SerDeStats stats = new SerDeStats();
    stats.setRowCount(rowCountDelta);
    // without finding the row we are updating or deleting, which would be a mess.
    return stats;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats)

Example 10 with SerDeStats

use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.

the class FileSinkOperator method getDynOutPaths.

protected FSPaths getDynOutPaths(List<String> row, String lbDirName) throws HiveException {
    FSPaths fp;
    // get the path corresponding to the dynamic partition columns,
    String dpDir = getDynPartDirectory(row, dpColNames);
    String pathKey = null;
    if (dpDir != null) {
        dpDir = appendToSource(lbDirName, dpDir);
        pathKey = dpDir;
        if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
            String buckNum = row.get(row.size() - 1);
            taskId = Utilities.replaceTaskIdFromFilename(taskId, buckNum);
            pathKey = appendToSource(taskId, dpDir);
        }
        FSPaths fsp2 = valToPaths.get(pathKey);
        if (fsp2 == null) {
            // check # of dp
            if (valToPaths.size() > maxPartitions) {
                // we cannot proceed and need to tell the hive client that retries won't succeed either
                throw new HiveFatalException(ErrorMsg.DYNAMIC_PARTITIONS_TOO_MANY_PER_NODE_ERROR.getErrorCodedMsg() + "Maximum was set to " + maxPartitions + " partitions per node" + ", number of dynamic partitions on this node: " + valToPaths.size());
            }
            if (!conf.getDpSortState().equals(DPSortState.NONE) && prevFsp != null) {
                // close the previous fsp as it is no longer needed
                prevFsp.closeWriters(false);
                // stats from the record writer and store in the previous fsp that is cached
                if (conf.isGatherStats() && isCollectRWStats) {
                    SerDeStats stats = null;
                    if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) {
                        RecordWriter outWriter = prevFsp.outWriters[0];
                        if (outWriter != null) {
                            stats = ((StatsProvidingRecordWriter) outWriter).getStats();
                        }
                    } else if (prevFsp.updaters[0] != null) {
                        stats = prevFsp.updaters[0].getStats();
                    }
                    if (stats != null) {
                        prevFsp.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize());
                        prevFsp.stat.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount());
                    }
                }
                // let writers release the memory for garbage collection
                prevFsp.outWriters[0] = null;
                prevFsp = null;
            }
            fsp2 = createNewPaths(dpDir);
            if (prevFsp == null) {
                prevFsp = fsp2;
            }
            if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
                createBucketForFileIdx(fsp2, 0);
                valToPaths.put(pathKey, fsp2);
            }
        }
        fp = fsp2;
    } else {
        fp = fsp;
    }
    return fp;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) StatsProvidingRecordWriter(org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter) HiveFatalException(org.apache.hadoop.hive.ql.metadata.HiveFatalException)

Aggregations

SerDeStats (org.apache.hadoop.hive.serde2.SerDeStats)10 IOException (java.io.IOException)2 StatsProvidingRecordWriter (org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 HiveFatalException (org.apache.hadoop.hive.ql.metadata.HiveFatalException)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 LazyObjectInspectorParametersImpl (org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 SubStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SubStructObjectInspector)1 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1 LongWritable (org.apache.hadoop.io.LongWritable)1 Text (org.apache.hadoop.io.Text)1