use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.
the class LazySimpleSerDe method initialize.
/**
* Initialize the SerDe given the parameters. serialization.format: separator
* char or byte code (only supports byte-value up to 127) columns:
* ","-separated column names columns.types: ",", ":", or ";"-separated column
* types
*
* @see AbstractSerDe#initialize(Configuration, Properties)
*/
@Override
public void initialize(Configuration job, Properties tbl) throws SerDeException {
super.initialize(job, tbl);
serdeParams = new LazySerDeParameters(job, tbl, getClass().getName());
// Create the ObjectInspectors for the fields
cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), new LazyObjectInspectorParametersImpl(serdeParams));
cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector);
serializedSize = 0;
stats = new SerDeStats();
lastOperationSerialize = false;
lastOperationDeserialize = false;
}
use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.
the class ColumnarSerDeBase method initialize.
protected void initialize(int size) throws SerDeException {
field = new BytesRefWritable[size];
for (int i = 0; i < size; i++) {
field[i] = new BytesRefWritable();
serializeCache.set(i, field[i]);
}
serializedSize = 0;
stats = new SerDeStats();
lastOperationSerialize = false;
lastOperationDeserialize = false;
}
use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.
the class MapOperator method populateVirtualColumnValues.
public static Object[] populateVirtualColumnValues(ExecMapperContext ctx, List<VirtualColumn> vcs, Object[] vcValues, Deserializer deserializer) {
if (vcs == null) {
return vcValues;
}
if (vcValues == null) {
vcValues = new Object[vcs.size()];
}
for (int i = 0; i < vcs.size(); i++) {
switch(vcs.get(i)) {
case FILENAME:
if (ctx.inputFileChanged()) {
vcValues[i] = new Text(ctx.getCurrentInputPath().toString());
}
break;
case BLOCKOFFSET:
{
long current = ctx.getIoCxt().getCurrentBlockStart();
LongWritable old = (LongWritable) vcValues[i];
if (old == null) {
old = new LongWritable(current);
vcValues[i] = old;
continue;
}
if (current != old.get()) {
old.set(current);
}
}
break;
case ROWOFFSET:
{
long current = ctx.getIoCxt().getCurrentRow();
LongWritable old = (LongWritable) vcValues[i];
if (old == null) {
old = new LongWritable(current);
vcValues[i] = old;
continue;
}
if (current != old.get()) {
old.set(current);
}
}
break;
case RAWDATASIZE:
long current = 0L;
SerDeStats stats = deserializer.getSerDeStats();
if (stats != null) {
current = stats.getRawDataSize();
}
LongWritable old = (LongWritable) vcValues[i];
if (old == null) {
old = new LongWritable(current);
vcValues[i] = old;
continue;
}
if (current != old.get()) {
old.set(current);
}
break;
case ROWID:
if (ctx.getIoCxt().getRecordIdentifier() == null) {
vcValues[i] = null;
} else {
if (vcValues[i] == null) {
vcValues[i] = new Object[RecordIdentifier.Field.values().length];
}
RecordIdentifier.StructInfo.toArray(ctx.getIoCxt().getRecordIdentifier(), (Object[]) vcValues[i]);
//so we don't accidentally cache the value; shouldn't
ctx.getIoCxt().setRecordIdentifier(null);
//happen since IO layer either knows how to produce ROW__ID or not - but to be safe
}
break;
}
}
return vcValues;
}
use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.
the class OrcRecordUpdater method getStats.
@Override
public SerDeStats getStats() {
SerDeStats stats = new SerDeStats();
stats.setRowCount(rowCountDelta);
// without finding the row we are updating or deleting, which would be a mess.
return stats;
}
use of org.apache.hadoop.hive.serde2.SerDeStats in project hive by apache.
the class FileSinkOperator method getDynOutPaths.
protected FSPaths getDynOutPaths(List<String> row, String lbDirName) throws HiveException {
FSPaths fp;
// get the path corresponding to the dynamic partition columns,
String dpDir = getDynPartDirectory(row, dpColNames);
String pathKey = null;
if (dpDir != null) {
dpDir = appendToSource(lbDirName, dpDir);
pathKey = dpDir;
if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
String buckNum = row.get(row.size() - 1);
taskId = Utilities.replaceTaskIdFromFilename(taskId, buckNum);
pathKey = appendToSource(taskId, dpDir);
}
FSPaths fsp2 = valToPaths.get(pathKey);
if (fsp2 == null) {
// check # of dp
if (valToPaths.size() > maxPartitions) {
// we cannot proceed and need to tell the hive client that retries won't succeed either
throw new HiveFatalException(ErrorMsg.DYNAMIC_PARTITIONS_TOO_MANY_PER_NODE_ERROR.getErrorCodedMsg() + "Maximum was set to " + maxPartitions + " partitions per node" + ", number of dynamic partitions on this node: " + valToPaths.size());
}
if (!conf.getDpSortState().equals(DPSortState.NONE) && prevFsp != null) {
// close the previous fsp as it is no longer needed
prevFsp.closeWriters(false);
// stats from the record writer and store in the previous fsp that is cached
if (conf.isGatherStats() && isCollectRWStats) {
SerDeStats stats = null;
if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) {
RecordWriter outWriter = prevFsp.outWriters[0];
if (outWriter != null) {
stats = ((StatsProvidingRecordWriter) outWriter).getStats();
}
} else if (prevFsp.updaters[0] != null) {
stats = prevFsp.updaters[0].getStats();
}
if (stats != null) {
prevFsp.stat.addToStat(StatsSetupConst.RAW_DATA_SIZE, stats.getRawDataSize());
prevFsp.stat.addToStat(StatsSetupConst.ROW_COUNT, stats.getRowCount());
}
}
// let writers release the memory for garbage collection
prevFsp.outWriters[0] = null;
prevFsp = null;
}
fsp2 = createNewPaths(dpDir);
if (prevFsp == null) {
prevFsp = fsp2;
}
if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
createBucketForFileIdx(fsp2, 0);
valToPaths.put(pathKey, fsp2);
}
}
fp = fsp2;
} else {
fp = fsp;
}
return fp;
}
Aggregations