use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class SparkReduceRecordHandler method init.
@Override
@SuppressWarnings("unchecked")
public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
super.init(job, output, reporter);
rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector keyObjectInspector;
ReduceWork gWork = Utilities.getReduceWork(job);
reducer = gWork.getReducer();
vectorized = gWork.getVectorMode();
// clear out any parents as reducer is the
reducer.setParentOperators(null);
batchContext = gWork.getVectorizedRowBatchCtx();
// root
isTagged = gWork.getNeedsTagging();
try {
keyTableDesc = gWork.getKeyDesc();
inputKeySerDe = ReflectionUtils.newInstance(keyTableDesc.getSerDeClass(), null);
inputKeySerDe.initialize(null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeySerDe.getObjectInspector();
valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
if (vectorized) {
final int maxTags = gWork.getTagToValueDesc().size();
// CONSIDER: Cleaning up this code and eliminating the arrays. Vectorization only handles
// one operator tree.
Preconditions.checkState(maxTags == 1);
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
buffer = new DataOutputBuffer();
}
for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
// We should initialize the SerDe with the TypeInfo when available.
valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
AbstractSerDe inputValueSerDe = ReflectionUtils.newInstance(valueTableDesc[tag].getSerDeClass(), null);
inputValueSerDe.initialize(null, valueTableDesc[tag].getProperties(), null);
inputValueDeserializer[tag] = inputValueSerDe;
valueObjectInspector[tag] = inputValueSerDe.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspector = (StructObjectInspector) valueObjectInspector[tag];
final int totalColumns = firstValueColumnOffset + valueStructInspector.getAllStructFieldRefs().size();
rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspector);
batch = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeySerDe;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), (batchContext.getRowdataTypePhysicalVariations().length > firstValueColumnOffset) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), 0, firstValueColumnOffset) : batchContext.getRowdataTypePhysicalVariations(), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspector.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspector), (batchContext.getRowdataTypePhysicalVariations().length >= totalColumns) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), firstValueColumnOffset, totalColumns) : null, /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector[tag]);
// reducer.setGroupKeyObjectInspector(keyObjectInspector);
rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
ExecMapperContext execContext = new ExecMapperContext(job);
localWork = gWork.getMapRedLocalWork();
execContext.setJc(jc);
execContext.setLocalWork(localWork);
reducer.passExecContext(execContext);
reducer.setReporter(rp);
OperatorUtils.setChildrenCollector(Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);
// initialize reduce operator tree
try {
LOG.info(reducer.dump(0));
reducer.initialize(jc, rowObjectInspector);
if (localWork != null) {
for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
dummyOp.setExecContext(execContext);
dummyOp.initialize(jc, null);
}
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class SemanticAnalyzer method genConvertCol.
private List<ExprNodeDesc> genConvertCol(String dest, QB qb, TableDesc tableDesc, Operator input, List<Integer> posns, boolean convert) throws SemanticException {
StructObjectInspector oi = null;
try {
AbstractSerDe deserializer = tableDesc.getSerDeClass().newInstance();
deserializer.initialize(conf, tableDesc.getProperties(), null);
oi = (StructObjectInspector) deserializer.getObjectInspector();
} catch (Exception e) {
throw new SemanticException(e);
}
List<? extends StructField> tableFields = oi.getAllStructFieldRefs();
List<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
// Check column type
int columnNumber = posns.size();
List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(columnNumber);
for (Integer posn : posns) {
ObjectInspector tableFieldOI = tableFields.get(posn).getFieldObjectInspector();
TypeInfo tableFieldTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(tableFieldOI);
TypeInfo rowFieldTypeInfo = rowFields.get(posn).getType();
ExprNodeDesc column = new ExprNodeColumnDesc(rowFieldTypeInfo, rowFields.get(posn).getInternalName(), rowFields.get(posn).getTabAlias(), rowFields.get(posn).getIsVirtualCol());
if (convert && !tableFieldTypeInfo.equals(rowFieldTypeInfo)) {
// need to do some conversions here
if (tableFieldTypeInfo.getCategory() != Category.PRIMITIVE) {
// cannot convert to complex types
column = null;
} else {
column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, (PrimitiveTypeInfo) tableFieldTypeInfo);
}
if (column == null) {
String reason = "Cannot convert column " + posn + " from " + rowFieldTypeInfo + " to " + tableFieldTypeInfo + ".";
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.TARGET_TABLE_COLUMN_MISMATCH.getMsg(), qb.getParseInfo().getDestForClause(dest), reason));
}
}
expressions.add(column);
}
return expressions;
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class PTFTranslator method setupShape.
private ShapeDetails setupShape(StructObjectInspector OI, List<String> columnNames, RowResolver rr) throws SemanticException {
Map<String, String> serdePropsMap = new LinkedHashMap<String, String>();
AbstractSerDe serde = null;
ShapeDetails shp = new ShapeDetails();
try {
serde = PTFTranslator.createLazyBinarySerDe(hCfg, OI, serdePropsMap);
StructObjectInspector outOI = PTFPartition.setupPartitionOutputOI(serde, OI);
shp.setOI(outOI);
} catch (SerDeException se) {
throw new SemanticException(se);
}
shp.setRr(rr);
shp.setSerde(serde);
shp.setSerdeClassName(serde.getClass().getName());
shp.setSerdeProps(serdePropsMap);
shp.setColumnNames(columnNames);
TypeCheckCtx tCtx = new TypeCheckCtx(rr);
tCtx.setUnparseTranslator(unparseT);
shp.setTypeCheckCtx(tCtx);
return shp;
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class MapJoinTestConfig method createMapJoinTableContainerSerDe.
public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoinDesc mapJoinDesc) throws SerDeException {
final Byte smallTablePos = 1;
TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc();
AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(BinarySortableSerDe.class, null);
keySerializer.initialize(null, keyTableDesc.getProperties(), null);
MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
final List<TableDesc> valueTableDescList;
if (mapJoinDesc.getNoOuterJoin()) {
valueTableDescList = mapJoinDesc.getValueTblDescs();
} else {
valueTableDescList = mapJoinDesc.getValueFilteredTblDescs();
}
TableDesc valueTableDesc = valueTableDescList.get(smallTablePos);
AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getSerDeClass(), null);
valueSerDe.initialize(null, valueTableDesc.getProperties(), null);
MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(mapJoinDesc, smallTablePos));
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = new MapJoinTableContainerSerDe(keyContext, valueContext);
return mapJoinTableContainerSerDe;
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class DynamicPartitionFileRecordWriterContainer method getLocalFileWriter.
@Override
protected LocalFileWriter getLocalFileWriter(HCatRecord value) throws IOException, HCatException {
OutputJobInfo localJobInfo = null;
// Calculate which writer to use from the remaining values - this needs to
// be done before we delete cols.
List<String> dynamicPartValues = new ArrayList<String>();
for (Integer colToAppend : dynamicPartCols) {
Object partitionValue = value.get(colToAppend);
dynamicPartValues.add(partitionValue == null ? HIVE_DEFAULT_PARTITION_VALUE : partitionValue.toString());
}
String dynKey = dynamicPartValues.toString();
if (!baseDynamicWriters.containsKey(dynKey)) {
if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) {
throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, "Number of dynamic partitions being created " + "exceeds configured max allowable partitions[" + maxDynamicPartitions + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + "] if needed.");
}
org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil.createTaskAttemptContext(context);
configureDynamicStorageHandler(currTaskContext, dynamicPartValues);
localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext.getConfiguration());
// Setup serDe.
AbstractSerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf());
try {
InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo);
} catch (SerDeException e) {
throw new IOException("Failed to initialize SerDe", e);
}
// create base OutputFormat
org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());
// We are skipping calling checkOutputSpecs() for each partition
// As it can throw a FileAlreadyExistsException when more than one
// mapper is writing to a partition.
// See HCATALOG-490, also to avoid contacting the namenode for each new
// FileOutputFormat instance.
// In general this should be ok for most FileOutputFormat implementations
// but may become an issue for cases when the method is used to perform
// other setup tasks.
// Get Output Committer
org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf().getOutputCommitter();
// Create currJobContext the latest so it gets all the config changes
org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext);
// Set up job.
baseOutputCommitter.setupJob(currJobContext);
// Recreate to refresh jobConf of currTask context.
currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible());
// Set temp location.
currTaskContext.getConfiguration().set("mapred.work.output.dir", new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath().toString());
// Set up task.
baseOutputCommitter.setupTask(currTaskContext);
Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext, currTaskContext.getConfiguration().get("mapreduce.output.basename", "part"), ""));
RecordWriter baseRecordWriter = baseOF.getRecordWriter(parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(), childPath.toString(), InternalUtil.createReporter(currTaskContext));
baseDynamicWriters.put(dynKey, baseRecordWriter);
baseDynamicSerDe.put(dynKey, currSerDe);
baseDynamicCommitters.put(dynKey, baseOutputCommitter);
dynamicContexts.put(dynKey, currTaskContext);
dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()));
dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey).getConfiguration()));
}
return new LocalFileWriter(baseDynamicWriters.get(dynKey), dynamicObjectInspectors.get(dynKey), baseDynamicSerDe.get(dynKey), dynamicOutputJobInfo.get(dynKey));
}
Aggregations