use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method createNewGroupingKey.
/*
* Create a new grouping key for grouping id.
* A dummy grouping id. is added. At runtime, the group by operator
* creates 'n' rows per input row, where 'n' is the number of grouping sets.
*/
private void createNewGroupingKey(List<ExprNodeDesc> groupByKeys, List<String> outputColumnNames, RowResolver groupByOutputRowResolver, Map<String, ExprNodeDesc> colExprMap) {
// The value for the constant does not matter. It is replaced by the grouping set
// value for the actual implementation
ExprNodeConstantDesc constant = new ExprNodeConstantDesc(VirtualColumn.GROUPINGID.getTypeInfo(), 0L);
groupByKeys.add(constant);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), new ColumnInfo(field, VirtualColumn.GROUPINGID.getTypeInfo(), null, true));
colExprMap.put(field, constant);
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method getColForInsertStmtSpec.
private RowResolver getColForInsertStmtSpec(Map<String, ExprNodeDesc> targetCol2Projection, final Table target, Map<String, ColumnInfo> targetCol2ColumnInfo, int colListPos, List<TypeInfo> targetTableColTypes, ArrayList<ExprNodeDesc> new_col_list, List<String> targetTableColNames) throws SemanticException {
RowResolver newOutputRR = new RowResolver();
Map<String, String> colNameToDefaultVal = null;
// see if we need to fetch default constraints from metastore
if (targetCol2Projection.size() < targetTableColNames.size()) {
try {
DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(target.getDbName(), target.getTableName());
colNameToDefaultVal = dc.getColNameToDefaultValueMap();
} catch (Exception e) {
if (e instanceof SemanticException) {
throw (SemanticException) e;
} else {
throw (new RuntimeException(e));
}
}
}
boolean defaultConstraintsFetch = true;
for (int i = 0; i < targetTableColNames.size(); i++) {
String f = targetTableColNames.get(i);
if (targetCol2Projection.containsKey(f)) {
// put existing column in new list to make sure it is in the right position
new_col_list.add(targetCol2Projection.get(f));
ColumnInfo ci = targetCol2ColumnInfo.get(f);
ci.setInternalName(getColumnInternalName(colListPos));
newOutputRR.put(ci.getTabAlias(), ci.getInternalName(), ci);
} else {
// add new 'synthetic' columns for projections not provided by Select
assert (colNameToDefaultVal != null);
ExprNodeDesc exp = null;
if (colNameToDefaultVal.containsKey(f)) {
// make an expression for default value
String defaultValue = colNameToDefaultVal.get(f);
ParseDriver parseDriver = new ParseDriver();
try {
ASTNode defValAst = parseDriver.parseExpression(defaultValue);
exp = TypeCheckProcFactory.genExprNode(defValAst, new TypeCheckCtx(null)).get(defValAst);
} catch (Exception e) {
throw new SemanticException("Error while parsing default value: " + defaultValue + ". Error message: " + e.getMessage());
}
LOG.debug("Added default value from metastore: " + exp);
} else {
exp = new ExprNodeConstantDesc(targetTableColTypes.get(i), null);
}
new_col_list.add(exp);
// this column doesn't come from any table
final String tableAlias = null;
ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos), exp.getWritableObjectInspector(), tableAlias, false);
newOutputRR.put(colInfo.getTabAlias(), colInfo.getInternalName(), colInfo);
}
colListPos++;
}
return newOutputRR;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method genFileSinkPlan.
@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
QBMetaData qbm = qb.getMetaData();
Integer dest_type = qbm.getDestTypeForAlias(dest);
// destination table if any
Table dest_tab = null;
// true for full ACID table and MM table
boolean destTableIsTransactional;
// should the destination table be written to using ACID
boolean destTableIsFullAcid;
boolean destTableIsTemporary = false;
boolean destTableIsMaterialization = false;
// destination partition if any
Partition dest_part = null;
// the intermediate destination directory
Path queryTmpdir = null;
// the final destination directory
Path dest_path = null;
TableDesc table_desc = null;
int currentTableId = 0;
boolean isLocal = false;
SortBucketRSCtx rsCtx = new SortBucketRSCtx();
DynamicPartitionCtx dpCtx = null;
LoadTableDesc ltd = null;
ListBucketingCtx lbCtx = null;
Map<String, String> partSpec = null;
boolean isMmTable = false, isMmCtas = false;
Long writeId = null;
HiveTxnManager txnMgr = SessionState.get().getTxnMgr();
switch(dest_type.intValue()) {
case QBMetaData.DEST_TABLE:
{
dest_tab = qbm.getDestTableForAlias(dest);
destTableIsTransactional = AcidUtils.isTransactionalTable(dest_tab);
destTableIsFullAcid = AcidUtils.isFullAcidTable(dest_tab);
destTableIsTemporary = dest_tab.isTemporary();
// Is the user trying to insert into a external tables
checkExternalTable(dest_tab);
partSpec = qbm.getPartSpecForAlias(dest);
dest_path = dest_tab.getPath();
checkImmutableTable(qb, dest_tab, dest_path, false);
// check for partition
List<FieldSchema> parts = dest_tab.getPartitionKeys();
if (parts != null && parts.size() > 0) {
// table is partitioned
if (partSpec == null || partSpec.size() == 0) {
// user did NOT specify partition
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
}
dpCtx = qbm.getDPCtx(dest);
if (dpCtx == null) {
dest_tab.validatePartColumnNames(partSpec, false);
dpCtx = new DynamicPartitionCtx(dest_tab, partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
qbm.setDPCtx(dest, dpCtx);
}
}
// Check for dynamic partitions.
dpCtx = checkDynPart(qb, qbm, dest_tab, partSpec, dest);
if (dpCtx != null && dpCtx.getSPPath() != null) {
dest_path = new Path(dest_tab.getPath(), dpCtx.getSPPath());
}
boolean isNonNativeTable = dest_tab.isNonNative();
isMmTable = AcidUtils.isInsertOnlyTable(dest_tab.getParameters());
if (isNonNativeTable || isMmTable) {
queryTmpdir = dest_path;
} else {
queryTmpdir = ctx.getTempDirForFinalJobPath(dest_path);
}
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_TABLE specifying " + queryTmpdir + " from " + dest_path);
}
if (dpCtx != null) {
// set the root of the temporary path where dynamic partition columns will populate
dpCtx.setRootPath(queryTmpdir);
}
// this table_desc does not contain the partitioning columns
table_desc = Utilities.getTableDesc(dest_tab);
// Add NOT NULL constraint check
input = genConstraintsPlan(dest, qb, input);
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
currentTableId = destTableId;
destTableId++;
lbCtx = constructListBucketingCtx(dest_tab.getSkewedColNames(), dest_tab.getSkewedColValues(), dest_tab.getSkewedColValueLocationMaps(), dest_tab.isStoredAsSubDirectories(), conf);
// NOTE: specify Dynamic partitions in dest_tab for WriteEntity
if (!isNonNativeTable) {
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsFullAcid) {
acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
// todo: should this be done for MM? is it ok to use CombineHiveInputFormat with MM
checkAcidConstraints(qb, table_desc, dest_tab);
}
try {
if (ctx.getExplainConfig() != null) {
// For explain plan, txn won't be opened and doesn't make sense to allocate write id
writeId = 0L;
} else {
if (isMmTable) {
writeId = txnMgr.getTableWriteId(dest_tab.getDbName(), dest_tab.getTableName());
} else {
writeId = acidOp == Operation.NOT_ACID ? null : txnMgr.getTableWriteId(dest_tab.getDbName(), dest_tab.getTableName());
}
}
} catch (LockException ex) {
throw new SemanticException("Failed to allocate write Id", ex);
}
boolean isReplace = !qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName());
ltd = new LoadTableDesc(queryTmpdir, table_desc, dpCtx, acidOp, isReplace, writeId);
// For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
// deltas and base and leave them up to the cleaner to clean up
boolean isInsertInto = qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName());
LoadFileType loadType = (!isInsertInto && !destTableIsTransactional) ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING;
ltd.setLoadFileType(loadType);
ltd.setInsertOverwrite(!isInsertInto);
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
} else {
// This is a non-native table.
// We need to set stats as inaccurate.
setStatsForNonNativeTable(dest_tab);
// true if it is insert overwrite.
boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", dest_tab.getDbName(), dest_tab.getTableName()));
createInsertDesc(dest_tab, overwrite);
}
if (dest_tab.isMaterializedView()) {
materializedViewUpdateDesc = new MaterializedViewDesc(dest_tab.getFullyQualifiedName(), false, false, true);
}
WriteEntity output = generateTableWriteEntity(dest, dest_tab, partSpec, ltd, dpCtx, isNonNativeTable);
ctx.getLoadTableOutputMap().put(ltd, output);
break;
}
case QBMetaData.DEST_PARTITION:
{
dest_part = qbm.getDestPartitionForAlias(dest);
dest_tab = dest_part.getTable();
destTableIsTransactional = AcidUtils.isTransactionalTable(dest_tab);
destTableIsFullAcid = AcidUtils.isFullAcidTable(dest_tab);
checkExternalTable(dest_tab);
Path tabPath = dest_tab.getPath();
Path partPath = dest_part.getDataLocation();
checkImmutableTable(qb, dest_tab, partPath, true);
// if the table is in a different dfs than the partition,
// replace the partition's dfs with the table's dfs.
dest_path = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
isMmTable = AcidUtils.isInsertOnlyTable(dest_tab.getParameters());
queryTmpdir = isMmTable ? dest_path : ctx.getTempDirForFinalJobPath(dest_path);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_PARTITION specifying " + queryTmpdir + " from " + dest_path);
}
table_desc = Utilities.getTableDesc(dest_tab);
// Add NOT NULL constraint check
input = genConstraintsPlan(dest, qb, input);
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, table_desc, dest_tab, rsCtx);
idToTableNameMap.put(String.valueOf(destTableId), dest_tab.getTableName());
currentTableId = destTableId;
destTableId++;
lbCtx = constructListBucketingCtx(dest_part.getSkewedColNames(), dest_part.getSkewedColValues(), dest_part.getSkewedColValueLocationMaps(), dest_part.isStoredAsSubDirectories(), conf);
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsFullAcid) {
acidOp = getAcidType(table_desc.getOutputFileFormatClass(), dest);
// todo: should this be done for MM? is it ok to use CombineHiveInputFormat with MM?
checkAcidConstraints(qb, table_desc, dest_tab);
}
try {
if (ctx.getExplainConfig() != null) {
// For explain plan, txn won't be opened and doesn't make sense to allocate write id
writeId = 0L;
} else {
if (isMmTable) {
writeId = txnMgr.getTableWriteId(dest_tab.getDbName(), dest_tab.getTableName());
} else {
writeId = (acidOp == Operation.NOT_ACID) ? null : txnMgr.getTableWriteId(dest_tab.getDbName(), dest_tab.getTableName());
}
}
} catch (LockException ex) {
throw new SemanticException("Failed to allocate write Id", ex);
}
ltd = new LoadTableDesc(queryTmpdir, table_desc, dest_part.getSpec(), acidOp, writeId);
// For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
// deltas and base and leave them up to the cleaner to clean up
boolean isInsertInto = qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName());
LoadFileType loadType = (!isInsertInto && !destTableIsTransactional) ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING;
ltd.setLoadFileType(loadType);
ltd.setInsertOverwrite(!isInsertInto);
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
if (!outputs.add(new WriteEntity(dest_part, determineWriteType(ltd, dest_tab.isNonNative(), dest)))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_tab.getTableName() + "@" + dest_part.getName()));
}
break;
}
case QBMetaData.DEST_LOCAL_FILE:
isLocal = true;
// fall through
case QBMetaData.DEST_DFS_FILE:
{
dest_path = new Path(qbm.getDestFileForAlias(dest));
ArrayList<ColumnInfo> colInfos = inputRR.getColumnInfos();
// CTAS case: the file output format and serde are defined by the create
// table command rather than taking the default value
List<FieldSchema> field_schemas = null;
CreateTableDesc tblDesc = qb.getTableDesc();
CreateViewDesc viewDesc = qb.getViewDesc();
boolean isCtas = false;
if (tblDesc != null) {
field_schemas = new ArrayList<FieldSchema>();
destTableIsTemporary = tblDesc.isTemporary();
destTableIsMaterialization = tblDesc.isMaterialization();
if (AcidUtils.isInsertOnlyTable(tblDesc.getTblProps(), true)) {
isMmTable = isMmCtas = true;
try {
if (ctx.getExplainConfig() != null) {
// For explain plan, txn won't be opened and doesn't make sense to allocate write id
writeId = 0L;
} else {
writeId = txnMgr.getTableWriteId(tblDesc.getDatabaseName(), tblDesc.getTableName());
}
} catch (LockException ex) {
throw new SemanticException("Failed to allocate write Id", ex);
}
tblDesc.setInitialMmWriteId(writeId);
}
} else if (viewDesc != null) {
field_schemas = new ArrayList<FieldSchema>();
destTableIsTemporary = false;
}
if (isLocal) {
assert !isMmTable;
// for local directory - we always write to map-red intermediate
// store and then copy to local fs
queryTmpdir = ctx.getMRTmpPath();
} else {
// no copy is required. we may want to revisit this policy in future
try {
Path qPath = FileUtils.makeQualified(dest_path, conf);
queryTmpdir = isMmTable ? qPath : ctx.getTempDirForFinalJobPath(qPath);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Setting query directory " + queryTmpdir + " from " + dest_path + " (" + isMmTable + ")");
}
} catch (Exception e) {
throw new SemanticException("Error creating temporary folder on: " + dest_path, e);
}
}
ColsAndTypes ct = deriveFileSinkColTypes(inputRR, field_schemas);
String cols = ct.cols, colTypes = ct.colTypes;
// update the create table descriptor with the resulting schema.
if (tblDesc != null) {
tblDesc.setCols(new ArrayList<FieldSchema>(field_schemas));
} else if (viewDesc != null) {
viewDesc.setSchema(new ArrayList<FieldSchema>(field_schemas));
}
destTableIsTransactional = tblDesc != null && AcidUtils.isTransactionalTable(tblDesc);
destTableIsFullAcid = tblDesc != null && AcidUtils.isFullAcidTable(tblDesc);
boolean isDestTempFile = true;
if (!ctx.isMRTmpFileURI(dest_path.toUri().toString())) {
idToTableNameMap.put(String.valueOf(destTableId), dest_path.toUri().toString());
currentTableId = destTableId;
destTableId++;
isDestTempFile = false;
}
boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE);
// Create LFD even for MM CTAS - it's a no-op move, but it still seems to be used for stats.
loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, dest_path, isDfsDir, cols, colTypes, // there is a change here - prev version had 'transadtional', one beofre' acid'
destTableIsFullAcid ? Operation.INSERT : Operation.NOT_ACID, isMmCtas));
if (tblDesc == null) {
if (viewDesc != null) {
table_desc = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
} else if (qb.getIsQuery()) {
String fileFormat;
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
fileFormat = "SequenceFile";
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT, fileFormat);
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, ThriftJDBCBinarySerDe.class);
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// write out formatted thrift objects to SequenceFile
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
} else {
fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class;
if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) {
serdeClass = LazyBinarySerDe2.class;
}
table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, serdeClass);
}
} else {
table_desc = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
}
} else {
table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
}
if (!outputs.add(new WriteEntity(dest_path, !isDfsDir, isDestTempFile))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(dest_path.toUri().toString()));
}
break;
}
default:
throw new SemanticException("Unknown destination type: " + dest_type);
}
if (!(dest_type.intValue() == QBMetaData.DEST_DFS_FILE && qb.getIsQuery())) {
input = genConversionSelectOperator(dest, qb, input, table_desc, dpCtx);
}
inputRR = opParseCtx.get(input).getRowResolver();
ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
if (updating(dest) || deleting(dest)) {
vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
} else {
try {
StructObjectInspector rowObjectInspector = (StructObjectInspector) table_desc.getDeserializer(conf).getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
vecCol.add(new ColumnInfo(fields.get(i).getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()), "", false));
}
} catch (Exception e) {
throw new SemanticException(e.getMessage(), e);
}
}
RowSchema fsRS = new RowSchema(vecCol);
// The output files of a FileSink can be merged if they are either not being written to a table
// or are being written to a table which is not bucketed
// and table the table is not sorted
boolean canBeMerged = (dest_tab == null || !((dest_tab.getNumBuckets() > 0) || (dest_tab.getSortCols() != null && dest_tab.getSortCols().size() > 0)));
// If this table is working with ACID semantics, turn off merging
canBeMerged &= !destTableIsFullAcid;
// Generate the partition columns from the parent input
if (dest_type.intValue() == QBMetaData.DEST_TABLE || dest_type.intValue() == QBMetaData.DEST_PARTITION) {
genPartnCols(dest, input, qb, table_desc, dest_tab, rsCtx);
}
FileSinkDesc fileSinkDesc = createFileSinkDesc(dest, table_desc, dest_part, // this was 1/4 acid
dest_path, // this was 1/4 acid
currentTableId, // this was 1/4 acid
destTableIsFullAcid, // this was 1/4 acid
destTableIsTemporary, destTableIsMaterialization, queryTmpdir, rsCtx, dpCtx, lbCtx, fsRS, canBeMerged, dest_tab, writeId, isMmCtas, dest_type, qb);
if (isMmCtas) {
// Add FSD so that the LoadTask compilation could fix up its path to avoid the move.
tableDesc.setWriter(fileSinkDesc);
}
if (fileSinkDesc.getInsertOverwrite()) {
if (ltd != null) {
ltd.setInsertOverwrite(true);
}
}
if (SessionState.get().isHiveServerQuery() && null != table_desc && table_desc.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(true);
} else {
fileSinkDesc.setIsUsingThriftJDBCBinarySerDe(false);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
handleLineage(ltd, output);
if (LOG.isDebugEnabled()) {
LOG.debug("Created FileSink Plan for clause: " + dest + "dest_path: " + dest_path + " row schema: " + inputRR.toString());
}
FileSinkOperator fso = (FileSinkOperator) output;
fso.getConf().setTable(dest_tab);
// and it is an insert overwrite or insert into table
if (dest_tab != null && !dest_tab.isNonNative() && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
if (dest_type.intValue() == QBMetaData.DEST_TABLE) {
genAutoColumnStatsGatheringPipeline(qb, table_desc, partSpec, input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
} else if (dest_type.intValue() == QBMetaData.DEST_PARTITION) {
genAutoColumnStatsGatheringPipeline(qb, table_desc, dest_part.getSpec(), input, qb.getParseInfo().isInsertIntoTable(dest_tab.getDbName(), dest_tab.getTableName()));
}
}
return output;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method genJoinReduceSinkChild.
@SuppressWarnings("nls")
private Operator genJoinReduceSinkChild(QB qb, ExprNodeDesc[] joinKeys, Operator<?> child, String[] srcs, int tag) throws SemanticException {
// dummy for backtracking
Operator dummy = Operator.createDummy();
dummy.setParentOperators(Arrays.asList(child));
RowResolver inputRR = opParseCtx.get(child).getRowResolver();
RowResolver outputRR = new RowResolver();
ArrayList<String> outputColumns = new ArrayList<String>();
ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>();
// Compute join keys and store in reduceKeys
for (ExprNodeDesc joinKey : joinKeys) {
reduceKeys.add(joinKey);
reduceKeysBack.add(ExprNodeDescUtils.backtrack(joinKey, dummy, child));
}
// Walk over the input row resolver and copy in the output
ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
ArrayList<ExprNodeDesc> reduceValuesBack = new ArrayList<ExprNodeDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ColumnInfo> columns = inputRR.getColumnInfos();
int[] index = new int[columns.size()];
for (int i = 0; i < columns.size(); i++) {
ColumnInfo colInfo = columns.get(i);
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo);
// backtrack can be null when input is script operator
ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, child);
int kindex;
if (exprBack == null) {
kindex = -1;
} else if (ExprNodeDescUtils.isConstant(exprBack)) {
kindex = reduceKeysBack.indexOf(exprBack);
} else {
kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
}
if (kindex >= 0) {
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
newColInfo.setTabAlias(nm[0]);
outputRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
index[i] = kindex;
continue;
}
index[i] = -reduceValues.size() - 1;
String outputColName = getColumnInternalName(reduceValues.size());
reduceValues.add(expr);
reduceValuesBack.add(exprBack);
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
newColInfo.setTabAlias(nm[0]);
outputRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
outputColumns.add(outputColName);
}
dummy.setParentOperators(null);
int numReds = -1;
// Use only 1 reducer in case of cartesian product
if (reduceKeys.size() == 0) {
numReds = 1;
String error = StrictChecks.checkCartesian(conf);
if (error != null) {
throw new SemanticException(error);
}
}
ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumns, false, tag, reduceKeys.size(), numReds, AcidUtils.Operation.NOT_ACID);
ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputRR.getColumnInfos()), child), outputRR);
List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
for (int i = 0; i < keyColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
}
List<String> valColNames = rsDesc.getOutputValueColumnNames();
for (int i = 0; i < valColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
}
rsOp.setValueIndex(index);
rsOp.setColumnExprMap(colExprMap);
rsOp.setInputAliases(srcs);
return rsOp;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method handleInsertStatementSpec.
/**
* This modifies the Select projections when the Select is part of an insert statement and
* the insert statement specifies a column list for the target table, e.g.
* create table source (a int, b int);
* create table target (x int, y int, z int);
* insert into target(z,x) select * from source
*
* Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks
* as if the original query was written as
* insert into target select b, null, a from source
*
* if target schema is not specified, this is no-op
*
* @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
* @throws SemanticException
*/
public RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, RowResolver outputRR, RowResolver inputRR, QB qb, ASTNode selExprList) throws SemanticException {
// (z,x)
// specified in the query
List<String> targetTableSchema = qb.getParseInfo().getDestSchemaForClause(dest);
if (targetTableSchema == null) {
// no insert schema was specified
return outputRR;
}
if (targetTableSchema.size() != col_list.size()) {
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
throw new SemanticException(generateErrorMessage(selExprList, "Expected " + targetTableSchema.size() + " columns for " + dest + (target != null ? "/" + target.getCompleteName() : (partition != null ? "/" + partition.getCompleteName() : "")) + "; select produces " + col_list.size() + " columns"));
}
// e.g. map z->expr for a
Map<String, ExprNodeDesc> targetCol2Projection = new HashMap<String, ExprNodeDesc>();
// e.g. map z->ColumnInfo for a
Map<String, ColumnInfo> targetCol2ColumnInfo = new HashMap<String, ColumnInfo>();
int colListPos = 0;
for (String targetCol : targetTableSchema) {
targetCol2ColumnInfo.put(targetCol, outputRR.getColumnInfos().get(colListPos));
targetCol2Projection.put(targetCol, col_list.get(colListPos++));
}
Table target = qb.getMetaData().getDestTableForAlias(dest);
Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null;
if (target == null && partition == null) {
throw new SemanticException(generateErrorMessage(selExprList, "No table/partition found in QB metadata for dest='" + dest + "'"));
}
ArrayList<ExprNodeDesc> new_col_list = new ArrayList<ExprNodeDesc>();
colListPos = 0;
List<FieldSchema> targetTableCols = target != null ? target.getCols() : partition.getCols();
List<String> targetTableColNames = new ArrayList<String>();
List<TypeInfo> targetTableColTypes = new ArrayList<TypeInfo>();
for (FieldSchema fs : targetTableCols) {
targetTableColNames.add(fs.getName());
targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
}
Map<String, String> partSpec = qb.getMetaData().getPartSpecForAlias(dest);
if (partSpec != null) {
// relies on consistent order via LinkedHashMap
for (Map.Entry<String, String> partKeyVal : partSpec.entrySet()) {
if (partKeyVal.getValue() == null) {
// these must be after non-partition cols
targetTableColNames.add(partKeyVal.getKey());
targetTableColTypes.add(TypeInfoFactory.stringTypeInfo);
}
}
}
// now make the select produce <regular columns>,<dynamic partition columns> with
// where missing columns are NULL-filled
Table tbl = target == null ? partition.getTable() : target;
RowResolver newOutputRR = getColForInsertStmtSpec(targetCol2Projection, tbl, targetCol2ColumnInfo, colListPos, targetTableColTypes, new_col_list, targetTableColNames);
col_list.clear();
col_list.addAll(new_col_list);
return newOutputRR;
}
Aggregations