use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class HiveUnionVisitor method genInputSelectForUnion.
private Operator<? extends OperatorDesc> genInputSelectForUnion(Operator<? extends OperatorDesc> origInputOp, ArrayList<ColumnInfo> uColumnInfo) throws SemanticException {
Iterator<ColumnInfo> oIter = origInputOp.getSchema().getSignature().iterator();
Iterator<ColumnInfo> uIter = uColumnInfo.iterator();
List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
List<String> colName = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
boolean needSelectOp = false;
while (oIter.hasNext()) {
ColumnInfo oInfo = oIter.next();
ColumnInfo uInfo = uIter.next();
if (!oInfo.isSameColumnForRR(uInfo)) {
needSelectOp = true;
}
ExprNodeDesc column = new ExprNodeColumnDesc(oInfo.getType(), oInfo.getInternalName(), oInfo.getTabAlias(), oInfo.getIsVirtualCol(), oInfo.isSkewedCol());
if (!oInfo.getType().equals(uInfo.getType())) {
column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().createConversionCast(column, (PrimitiveTypeInfo) uInfo.getType());
}
columns.add(column);
colName.add(uInfo.getInternalName());
columnExprMap.put(uInfo.getInternalName(), column);
}
if (needSelectOp) {
return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(uColumnInfo), columnExprMap, origInputOp);
} else {
return origInputOp;
}
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class HiveOpConverterUtils method genReduceSinkAndBacktrackSelect.
static SelectOperator genReduceSinkAndBacktrackSelect(Operator<?> input, ExprNodeDesc[] keys, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf, List<String> keepColNames) throws SemanticException {
// 1. Generate RS operator
// 1.1 Prune the tableNames, only count the tableNames that are not empty strings
// as empty string in table aliases is only allowed for virtual columns.
String tableAlias = null;
Set<String> tableNames = input.getSchema().getTableNames();
for (String tableName : tableNames) {
if (tableName != null) {
if (tableName.length() == 0) {
if (tableAlias == null) {
tableAlias = tableName;
}
} else {
if (tableAlias == null || tableAlias.length() == 0) {
tableAlias = tableName;
} else {
if (!tableName.equals(tableAlias)) {
throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only " + "one tableAlias but there is more than one");
}
}
}
}
}
if (tableAlias == null) {
throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
}
// 1.2 Now generate RS operator
ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, nullOrder, numReducers, acidOperation, hiveConf);
// 2. Generate backtrack Select operator
Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames, rsOp.getConf().getOutputKeyColumnNames(), rsOp.getConf().getOutputValueColumnNames(), rsOp.getValueIndex(), input);
SelectDesc selectDesc = new SelectDesc(new ArrayList<ExprNodeDesc>(descriptors.values()), new ArrayList<String>(descriptors.keySet()));
ArrayList<ColumnInfo> cinfoLst = createColInfosSubset(input, keepColNames);
SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(selectDesc, new RowSchema(cinfoLst), rsOp);
selectOp.setColumnExprMap(descriptors);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + selectOp + " with row schema: [" + selectOp.getSchema() + "]");
}
return selectOp;
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class SemanticAnalyzer method genFileSinkPlan.
@SuppressWarnings("nls")
protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
QBMetaData qbm = qb.getMetaData();
Integer destType = qbm.getDestTypeForAlias(dest);
// destination table if any
Table destinationTable = null;
// true for full ACID table and MM table
boolean destTableIsTransactional;
// should the destination table be written to using ACID
boolean destTableIsFullAcid;
// should we add files directly to the final path
boolean isDirectInsert = false;
AcidUtils.Operation acidOperation = null;
boolean destTableIsTemporary = false;
boolean destTableIsMaterialization = false;
// destination partition if any
Partition destinationPartition = null;
// the intermediate destination directory
Path queryTmpdir = null;
String moveTaskId = null;
// the final destination directory
Path destinationPath = null;
TableDesc tableDescriptor = null;
StructObjectInspector specificRowObjectInspector = null;
int currentTableId = 0;
boolean isLocal = false;
SortBucketRSCtx rsCtx = new SortBucketRSCtx();
DynamicPartitionCtx dpCtx = null;
LoadTableDesc ltd = null;
ListBucketingCtx lbCtx = null;
Map<String, String> partSpec = null;
boolean isMmTable = false, isMmCreate = false, isNonNativeTable = false;
Long writeId = null;
HiveTxnManager txnMgr = getTxnMgr();
switch(destType.intValue()) {
case QBMetaData.DEST_TABLE:
{
destinationTable = qbm.getDestTableForAlias(dest);
destTableIsTransactional = AcidUtils.isTransactionalTable(destinationTable);
destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
destTableIsTemporary = destinationTable.isTemporary();
// Is the user trying to insert into a external tables
checkExternalTable(destinationTable);
partSpec = qbm.getPartSpecForAlias(dest);
destinationPath = destinationTable.getPath();
checkImmutableTable(qb, destinationTable, destinationPath, false);
// check for partition
List<FieldSchema> parts = destinationTable.getPartitionKeys();
if (parts != null && parts.size() > 0) {
// table is partitioned
if (partSpec == null || partSpec.size() == 0) {
// user did NOT specify partition
throw new SemanticException(generateErrorMessage(qb.getParseInfo().getDestForClause(dest), ErrorMsg.NEED_PARTITION_ERROR.getMsg()));
}
dpCtx = qbm.getDPCtx(dest);
if (dpCtx == null) {
destinationTable.validatePartColumnNames(partSpec, false);
dpCtx = new DynamicPartitionCtx(partSpec, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
qbm.setDPCtx(dest, dpCtx);
}
}
// Check for dynamic partitions.
dpCtx = checkDynPart(qb, qbm, destinationTable, partSpec, dest);
if (dpCtx != null && dpCtx.getSPPath() != null) {
destinationPath = new Path(destinationTable.getPath(), dpCtx.getSPPath());
}
isNonNativeTable = destinationTable.isNonNative();
isMmTable = AcidUtils.isInsertOnlyTable(destinationTable.getParameters());
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
// this table_desc does not contain the partitioning columns
tableDescriptor = Utilities.getTableDesc(destinationTable);
if (!isNonNativeTable) {
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
}
}
isDirectInsert = isDirectInsert(destTableIsFullAcid, acidOp);
acidOperation = acidOp;
queryTmpdir = getTmpDir(isNonNativeTable, isMmTable, isDirectInsert, destinationPath);
moveTaskId = getMoveTaskId();
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_TABLE specifying " + queryTmpdir + " from " + destinationPath);
}
if (dpCtx != null) {
// set the root of the temporary path where dynamic partition columns will populate
dpCtx.setRootPath(queryTmpdir);
}
// Add NOT NULL constraint check
input = genConstraintsPlan(dest, qb, input);
if (!qb.getIsQuery()) {
input = genConversionSelectOperator(dest, qb, input, destinationTable.getDeserializer(), dpCtx, parts);
}
if (destinationTable.isMaterializedView() && mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD) {
// Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view
// TODO: We only do this for a full rebuild
String sortColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_SORT_COLUMNS);
String distributeColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_DISTRIBUTE_COLUMNS);
if (sortColsStr != null || distributeColsStr != null) {
input = genMaterializedViewDataOrgPlan(destinationTable, sortColsStr, distributeColsStr, inputRR, input);
}
} else {
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, tableDescriptor, destinationTable, rsCtx);
}
idToTableNameMap.put(String.valueOf(destTableId), destinationTable.getTableName());
currentTableId = destTableId;
destTableId++;
// NOTE: specify Dynamic partitions in dest_tab for WriteEntity
if (!isNonNativeTable || destinationTable.getStorageHandler().commitInMoveTask()) {
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
checkAcidConstraints();
} else {
lbCtx = constructListBucketingCtx(destinationTable.getSkewedColNames(), destinationTable.getSkewedColValues(), destinationTable.getSkewedColValueLocationMaps(), destinationTable.isStoredAsSubDirectories());
}
try {
if (ctx.getExplainConfig() != null) {
// For explain plan, txn won't be opened and doesn't make sense to allocate write id
writeId = null;
} else {
if (isMmTable) {
writeId = txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
} else {
writeId = acidOp == Operation.NOT_ACID ? null : txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
}
}
} catch (LockException ex) {
throw new SemanticException("Failed to allocate write Id", ex);
}
boolean isReplace = !qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName());
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx, acidOp, isReplace, writeId);
if (writeId != null) {
ltd.setStmtId(txnMgr.getCurrentStmtId());
}
ltd.setMoveTaskId(moveTaskId);
// For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
// deltas and base and leave them up to the cleaner to clean up
boolean isInsertInto = qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName());
LoadFileType loadType;
if (isDirectInsert) {
loadType = LoadFileType.IGNORE;
} else if (!isInsertInto && !destTableIsTransactional) {
loadType = LoadFileType.REPLACE_ALL;
} else {
loadType = LoadFileType.KEEP_EXISTING;
}
ltd.setLoadFileType(loadType);
ltd.setInsertOverwrite(!isInsertInto);
ltd.setIsDirectInsert(isDirectInsert);
ltd.setLbCtx(lbCtx);
loadTableWork.add(ltd);
} else {
// This is a non-native table.
// We need to set stats as inaccurate.
setStatsForNonNativeTable(destinationTable.getDbName(), destinationTable.getTableName());
// true if it is insert overwrite.
boolean overwrite = !qb.getParseInfo().isInsertIntoTable(String.format("%s.%s", destinationTable.getDbName(), destinationTable.getTableName()));
createPreInsertDesc(destinationTable, overwrite);
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, partSpec == null ? ImmutableMap.of() : partSpec);
ltd.setInsertOverwrite(overwrite);
ltd.setLoadFileType(overwrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING);
}
if (destinationTable.isMaterializedView()) {
materializedViewUpdateDesc = new MaterializedViewUpdateDesc(destinationTable.getFullyQualifiedName(), false, false, true);
}
WriteEntity output = generateTableWriteEntity(dest, destinationTable, partSpec, ltd, dpCtx);
ctx.getLoadTableOutputMap().put(ltd, output);
break;
}
case QBMetaData.DEST_PARTITION:
{
destinationPartition = qbm.getDestPartitionForAlias(dest);
destinationTable = destinationPartition.getTable();
destTableIsTransactional = AcidUtils.isTransactionalTable(destinationTable);
destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
checkExternalTable(destinationTable);
Path partPath = destinationPartition.getDataLocation();
checkImmutableTable(qb, destinationTable, partPath, true);
// Previous behavior (HIVE-1707) used to replace the partition's dfs with the table's dfs.
// The changes in HIVE-19891 appears to no longer support that behavior.
destinationPath = partPath;
if (MetaStoreUtils.isArchived(destinationPartition.getTPartition())) {
try {
String conflictingArchive = ArchiveUtils.conflictingArchiveNameOrNull(db, destinationTable, destinationPartition.getSpec());
String message = String.format("Insert conflict with existing archive: %s", conflictingArchive);
throw new SemanticException(message);
} catch (SemanticException err) {
throw err;
} catch (HiveException err) {
throw new SemanticException(err);
}
}
isNonNativeTable = destinationTable.isNonNative();
isMmTable = AcidUtils.isInsertOnlyTable(destinationTable.getParameters());
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
// this table_desc does not contain the partitioning columns
tableDescriptor = Utilities.getTableDesc(destinationTable);
if (!isNonNativeTable) {
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
}
}
isDirectInsert = isDirectInsert(destTableIsFullAcid, acidOp);
acidOperation = acidOp;
queryTmpdir = getTmpDir(isNonNativeTable, isMmTable, isDirectInsert, destinationPath);
moveTaskId = getMoveTaskId();
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("create filesink w/DEST_PARTITION specifying " + queryTmpdir + " from " + destinationPath);
}
// Add NOT NULL constraint check
input = genConstraintsPlan(dest, qb, input);
if (!qb.getIsQuery()) {
input = genConversionSelectOperator(dest, qb, input, destinationTable.getDeserializer(), dpCtx, null);
}
if (destinationTable.isMaterializedView() && mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD) {
// Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view
// TODO: We only do this for a full rebuild
String sortColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_SORT_COLUMNS);
String distributeColsStr = destinationTable.getProperty(Constants.MATERIALIZED_VIEW_DISTRIBUTE_COLUMNS);
if (sortColsStr != null || distributeColsStr != null) {
input = genMaterializedViewDataOrgPlan(destinationTable, sortColsStr, distributeColsStr, inputRR, input);
}
} else {
// Add sorting/bucketing if needed
input = genBucketingSortingDest(dest, input, qb, tableDescriptor, destinationTable, rsCtx);
}
idToTableNameMap.put(String.valueOf(destTableId), destinationTable.getTableName());
currentTableId = destTableId;
destTableId++;
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
checkAcidConstraints();
} else {
// Transactional tables can't be list bucketed or have skewed cols
lbCtx = constructListBucketingCtx(destinationPartition.getSkewedColNames(), destinationPartition.getSkewedColValues(), destinationPartition.getSkewedColValueLocationMaps(), destinationPartition.isStoredAsSubDirectories());
}
try {
if (ctx.getExplainConfig() != null) {
// For explain plan, txn won't be opened and doesn't make sense to allocate write id
writeId = null;
} else {
if (isMmTable) {
writeId = txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
} else {
writeId = (acidOp == Operation.NOT_ACID) ? null : txnMgr.getTableWriteId(destinationTable.getDbName(), destinationTable.getTableName());
}
}
} catch (LockException ex) {
throw new SemanticException("Failed to allocate write Id", ex);
}
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, destinationPartition.getSpec(), acidOp, writeId);
if (writeId != null) {
ltd.setStmtId(txnMgr.getCurrentStmtId());
}
// For the current context for generating File Sink Operator, it is either INSERT INTO or INSERT OVERWRITE.
// So the next line works.
boolean isInsertInto = !qb.getParseInfo().isDestToOpTypeInsertOverwrite(dest);
// For Acid table, Insert Overwrite shouldn't replace the table content. We keep the old
// deltas and base and leave them up to the cleaner to clean up
LoadFileType loadType;
if (isDirectInsert) {
loadType = LoadFileType.IGNORE;
} else if (!isInsertInto && !destTableIsTransactional) {
loadType = LoadFileType.REPLACE_ALL;
} else {
loadType = LoadFileType.KEEP_EXISTING;
}
ltd.setLoadFileType(loadType);
ltd.setInsertOverwrite(!isInsertInto);
ltd.setIsDirectInsert(isDirectInsert);
ltd.setLbCtx(lbCtx);
ltd.setMoveTaskId(moveTaskId);
loadTableWork.add(ltd);
if (!outputs.add(new WriteEntity(destinationPartition, determineWriteType(ltd, dest)))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(destinationTable.getTableName() + "@" + destinationPartition.getName()));
}
break;
}
case QBMetaData.DEST_LOCAL_FILE:
isLocal = true;
// fall through
case QBMetaData.DEST_DFS_FILE:
{
destinationPath = getDestinationFilePath(qbm.getDestFileForAlias(dest), isMmTable);
// CTAS case: the file output format and serde are defined by the create
// table command rather than taking the default value
List<FieldSchema> fieldSchemas = null;
List<FieldSchema> partitionColumns = null;
List<String> partitionColumnNames = null;
List<FieldSchema> sortColumns = null;
List<String> sortColumnNames = null;
List<FieldSchema> distributeColumns = null;
List<String> distributeColumnNames = null;
List<ColumnInfo> fileSinkColInfos = null;
List<ColumnInfo> sortColInfos = null;
List<ColumnInfo> distributeColInfos = null;
TableName tableName = null;
Map<String, String> tblProps = null;
CreateTableDesc tblDesc = qb.getTableDesc();
CreateMaterializedViewDesc viewDesc = qb.getViewDesc();
if (tblDesc != null) {
fieldSchemas = new ArrayList<>();
partitionColumns = new ArrayList<>();
partitionColumnNames = tblDesc.getPartColNames();
fileSinkColInfos = new ArrayList<>();
destTableIsTemporary = tblDesc.isTemporary();
destTableIsMaterialization = tblDesc.isMaterialization();
tableName = TableName.fromString(tblDesc.getDbTableName(), null, tblDesc.getDatabaseName());
tblProps = tblDesc.getTblProps();
} else if (viewDesc != null) {
fieldSchemas = new ArrayList<>();
partitionColumns = new ArrayList<>();
partitionColumnNames = viewDesc.getPartColNames();
sortColumns = new ArrayList<>();
sortColumnNames = viewDesc.getSortColNames();
distributeColumns = new ArrayList<>();
distributeColumnNames = viewDesc.getDistributeColNames();
fileSinkColInfos = new ArrayList<>();
sortColInfos = new ArrayList<>();
distributeColInfos = new ArrayList<>();
destTableIsTemporary = false;
destTableIsMaterialization = false;
tableName = HiveTableName.ofNullableWithNoDefault(viewDesc.getViewName());
tblProps = viewDesc.getTblProps();
}
destTableIsTransactional = tblProps != null && AcidUtils.isTablePropertyTransactional(tblProps);
if (destTableIsTransactional) {
try {
if (ctx.getExplainConfig() != null) {
// For explain plan, txn won't be opened and doesn't make sense to allocate write id
writeId = 0L;
} else {
writeId = txnMgr.getTableWriteId(tableName.getDb(), tableName.getTable());
}
} catch (LockException ex) {
throw new SemanticException("Failed to allocate write Id", ex);
}
if (AcidUtils.isInsertOnlyTable(tblProps, true)) {
isMmTable = isMmCreate = true;
if (tblDesc != null) {
tblDesc.setInitialMmWriteId(writeId);
} else {
viewDesc.setInitialMmWriteId(writeId);
}
}
}
if (isLocal) {
assert !isMmTable;
// for local directory - we always write to map-red intermediate
// store and then copy to local fs
queryTmpdir = ctx.getMRTmpPath();
} else {
// no copy is required. we may want to revisit this policy in future
try {
Path qPath = FileUtils.makeQualified(destinationPath, conf);
queryTmpdir = isMmTable ? qPath : ctx.getTempDirForFinalJobPath(qPath);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Setting query directory " + queryTmpdir + " from " + destinationPath + " (" + isMmTable + ")");
}
} catch (Exception e) {
throw new SemanticException("Error creating temporary folder on: " + destinationPath, e);
}
}
// Check for dynamic partitions.
final String cols, colTypes;
final boolean isPartitioned;
if (dpCtx != null) {
throw new SemanticException("Dynamic partition context has already been created, this should not happen");
}
if (!CollectionUtils.isEmpty(partitionColumnNames)) {
ColsAndTypes ct = deriveFileSinkColTypes(inputRR, partitionColumnNames, sortColumnNames, distributeColumnNames, fieldSchemas, partitionColumns, sortColumns, distributeColumns, fileSinkColInfos, sortColInfos, distributeColInfos);
cols = ct.cols;
colTypes = ct.colTypes;
dpCtx = new DynamicPartitionCtx(partitionColumnNames, conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME), conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
qbm.setDPCtx(dest, dpCtx);
// set the root of the temporary path where dynamic partition columns will populate
dpCtx.setRootPath(queryTmpdir);
isPartitioned = true;
} else {
ColsAndTypes ct = deriveFileSinkColTypes(inputRR, sortColumnNames, distributeColumnNames, fieldSchemas, sortColumns, distributeColumns, sortColInfos, distributeColInfos);
cols = ct.cols;
colTypes = ct.colTypes;
isPartitioned = false;
}
// update the create table descriptor with the resulting schema.
if (tblDesc != null) {
tblDesc.setCols(new ArrayList<>(fieldSchemas));
tblDesc.setPartCols(new ArrayList<>(partitionColumns));
} else if (viewDesc != null) {
viewDesc.setSchema(new ArrayList<>(fieldSchemas));
viewDesc.setPartCols(new ArrayList<>(partitionColumns));
if (viewDesc.isOrganized()) {
viewDesc.setSortCols(new ArrayList<>(sortColumns));
viewDesc.setDistributeCols(new ArrayList<>(distributeColumns));
}
}
boolean isDestTempFile = true;
if (ctx.isMRTmpFileURI(destinationPath.toUri().toString()) == false && ctx.isResultCacheDir(destinationPath) == false) {
// not a temp dir and not a result cache dir
idToTableNameMap.put(String.valueOf(destTableId), destinationPath.toUri().toString());
currentTableId = destTableId;
destTableId++;
isDestTempFile = false;
}
if (tblDesc == null) {
if (viewDesc != null) {
tableDescriptor = PlanUtils.getTableDesc(viewDesc, cols, colTypes);
} else if (qb.getIsQuery()) {
Class<? extends Deserializer> serdeClass = LazySimpleSerDe.class;
String fileFormat = conf.getResultFileFormat().toString();
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()) {
serdeClass = ThriftJDBCBinarySerDe.class;
fileFormat = ResultFileFormat.SEQUENCEFILE.toString();
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// write out formatted thrift objects to SequenceFile
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
} else if (fileFormat.equals(PlanUtils.LLAP_OUTPUT_FORMAT_KEY)) {
// If this output format is Llap, check to see if Arrow is requested
boolean useArrow = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_OUTPUT_FORMAT_ARROW);
serdeClass = useArrow ? ArrowColumnarBatchSerDe.class : LazyBinarySerDe2.class;
}
tableDescriptor = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, serdeClass);
} else {
tableDescriptor = PlanUtils.getDefaultTableDesc(qb.getDirectoryDesc(), cols, colTypes);
}
} else {
tableDescriptor = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
}
// if available, set location in table desc properties
if (tblDesc != null && tblDesc.getLocation() != null && tableDescriptor != null && !tableDescriptor.getProperties().containsKey(hive_metastoreConstants.META_TABLE_LOCATION)) {
tableDescriptor.getProperties().setProperty(hive_metastoreConstants.META_TABLE_LOCATION, tblDesc.getLocation());
}
// We need a specific rowObjectInspector in this case
try {
specificRowObjectInspector = (StructObjectInspector) tableDescriptor.getDeserializer(conf).getObjectInspector();
} catch (Exception e) {
throw new SemanticException(e.getMessage(), e);
}
boolean isDfsDir = (destType == QBMetaData.DEST_DFS_FILE);
try {
destinationTable = tblDesc != null ? tblDesc.toTable(conf) : viewDesc != null ? viewDesc.toTable(conf) : null;
} catch (HiveException e) {
throw new SemanticException(e);
}
destTableIsFullAcid = AcidUtils.isFullAcidTable(destinationTable);
// Data organization (DISTRIBUTED, SORTED, CLUSTERED) for materialized view
if (viewDesc != null && viewDesc.isOrganized()) {
input = genMaterializedViewDataOrgPlan(sortColInfos, distributeColInfos, inputRR, input);
}
moveTaskId = getMoveTaskId();
if (isPartitioned) {
// Create a SELECT that may reorder the columns if needed
RowResolver rowResolver = new RowResolver();
List<ExprNodeDesc> columnExprs = new ArrayList<>();
List<String> colNames = new ArrayList<>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
for (int i = 0; i < fileSinkColInfos.size(); i++) {
ColumnInfo ci = fileSinkColInfos.get(i);
ExprNodeDesc columnExpr = new ExprNodeColumnDesc(ci);
String name = getColumnInternalName(i);
rowResolver.put("", name, new ColumnInfo(name, columnExpr.getTypeInfo(), "", false));
columnExprs.add(columnExpr);
colNames.add(name);
colExprMap.put(name, columnExpr);
}
input = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(columnExprs, colNames), new RowSchema(rowResolver.getColumnInfos()), input), rowResolver);
input.setColumnExprMap(colExprMap);
// If this is a partitioned CTAS or MV statement, we are going to create a LoadTableDesc
// object. Although the table does not exist in metastore, we will swap the CreateTableTask
// and MoveTask resulting from this LoadTable so in this specific case, first we create
// the metastore table, then we move and commit the partitions. At least for the time being,
// this order needs to be enforced because metastore expects a table to exist before we can
// add any partitions to it.
isNonNativeTable = tableDescriptor.isNonNative();
if (!isNonNativeTable || destinationTable.getStorageHandler().commitInMoveTask()) {
AcidUtils.Operation acidOp = AcidUtils.Operation.NOT_ACID;
if (destTableIsTransactional) {
acidOp = getAcidType(tableDescriptor.getOutputFileFormatClass(), dest, isMmTable);
checkAcidConstraints();
}
// isReplace = false in case concurrent operation is executed
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx, acidOp, false, writeId);
if (writeId != null) {
ltd.setStmtId(txnMgr.getCurrentStmtId());
}
ltd.setLoadFileType(LoadFileType.KEEP_EXISTING);
ltd.setInsertOverwrite(false);
loadTableWork.add(ltd);
} else {
// This is a non-native table.
// We need to set stats as inaccurate.
setStatsForNonNativeTable(tableDescriptor.getDbName(), tableDescriptor.getTableName());
ltd = new LoadTableDesc(queryTmpdir, tableDescriptor, dpCtx.getPartSpec());
ltd.setInsertOverwrite(false);
ltd.setLoadFileType(LoadFileType.KEEP_EXISTING);
}
ltd.setMoveTaskId(moveTaskId);
ltd.setMdTable(destinationTable);
WriteEntity output = generateTableWriteEntity(dest, destinationTable, dpCtx.getPartSpec(), ltd, dpCtx);
ctx.getLoadTableOutputMap().put(ltd, output);
} else {
// Create LFD even for MM CTAS - it's a no-op move, but it still seems to be used for stats.
LoadFileDesc loadFileDesc = new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, destinationPath, isDfsDir, cols, colTypes, // there is a change here - prev version had 'transactional', one before 'acid'
destTableIsFullAcid ? Operation.INSERT : Operation.NOT_ACID, isMmCreate);
loadFileDesc.setMoveTaskId(moveTaskId);
loadFileWork.add(loadFileDesc);
try {
Path qualifiedPath = destinationPath.getFileSystem(conf).makeQualified(destinationPath);
if (!outputs.add(new WriteEntity(qualifiedPath, !isDfsDir, isDestTempFile))) {
throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES.getMsg(destinationPath.toUri().toString()));
}
} catch (IOException ex) {
throw new SemanticException("Error while getting the full qualified path for the given directory: " + ex.getMessage());
}
}
break;
}
default:
throw new SemanticException("Unknown destination type: " + destType);
}
inputRR = opParseCtx.get(input).getRowResolver();
List<ColumnInfo> vecCol = new ArrayList<ColumnInfo>();
if (updating(dest) || deleting(dest)) {
vecCol.add(new ColumnInfo(VirtualColumn.ROWID.getName(), VirtualColumn.ROWID.getTypeInfo(), "", true));
} else {
try {
// If we already have a specific inspector (view or directory as a target) use that
// Otherwise use the table deserializer to get the inspector
StructObjectInspector rowObjectInspector = specificRowObjectInspector != null ? specificRowObjectInspector : (StructObjectInspector) destinationTable.getDeserializer().getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
for (StructField field : fields) {
vecCol.add(new ColumnInfo(field.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(field.getFieldObjectInspector()), "", false));
}
} catch (Exception e) {
throw new SemanticException(e.getMessage(), e);
}
}
RowSchema fsRS = new RowSchema(vecCol);
// The output files of a FileSink can be merged if they are either not being written to a table
// or are being written to a table which is not bucketed
// and table the table is not sorted
boolean canBeMerged = (destinationTable == null || !((destinationTable.getNumBuckets() > 0) || (destinationTable.getSortCols() != null && destinationTable.getSortCols().size() > 0)));
// If this table is working with ACID semantics, turn off merging
canBeMerged &= !destTableIsFullAcid;
// Generate the partition columns from the parent input
if (destType == QBMetaData.DEST_TABLE || destType == QBMetaData.DEST_PARTITION) {
genPartnCols(dest, input, qb, tableDescriptor, destinationTable, rsCtx);
}
FileSinkDesc fileSinkDesc = createFileSinkDesc(dest, tableDescriptor, destinationPartition, // this was 1/4 acid
destinationPath, // this was 1/4 acid
currentTableId, // this was 1/4 acid
destTableIsFullAcid, // this was 1/4 acid
destTableIsTemporary, destTableIsMaterialization, queryTmpdir, rsCtx, dpCtx, lbCtx, fsRS, canBeMerged, destinationTable, writeId, isMmCreate, destType, qb, isDirectInsert, acidOperation, moveTaskId);
if (isMmCreate) {
// Add FSD so that the LoadTask compilation could fix up its path to avoid the move.
if (tableDesc != null) {
tableDesc.setWriter(fileSinkDesc);
} else {
createVwDesc.setWriter(fileSinkDesc);
}
}
if (fileSinkDesc.getInsertOverwrite()) {
if (ltd != null) {
ltd.setInsertOverwrite(true);
}
}
if (null != tableDescriptor && useBatchingSerializer(tableDescriptor.getSerdeClassName())) {
fileSinkDesc.setIsUsingBatchingSerDe(true);
} else {
fileSinkDesc.setIsUsingBatchingSerDe(false);
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(fileSinkDesc, fsRS, input), inputRR);
// in case of a merge statement.
if (!isDirectInsert || acidOperation == AcidUtils.Operation.INSERT) {
handleLineage(ltd, output);
}
setWriteIdForSurrogateKeys(ltd, input);
LOG.debug("Created FileSink Plan for clause: {}dest_path: {} row schema: {}", dest, destinationPath, inputRR);
FileSinkOperator fso = (FileSinkOperator) output;
fso.getConf().setTable(destinationTable);
// and it is an insert overwrite or insert into table
if (conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && destinationTable != null && (!destinationTable.isNonNative() || destinationTable.getStorageHandler().commitInMoveTask()) && !destTableIsTemporary && !destTableIsMaterialization && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
if (destType == QBMetaData.DEST_TABLE) {
genAutoColumnStatsGatheringPipeline(destinationTable, partSpec, input, qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName()), false);
} else if (destType == QBMetaData.DEST_PARTITION) {
genAutoColumnStatsGatheringPipeline(destinationTable, destinationPartition.getSpec(), input, qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName()), false);
} else if (destType == QBMetaData.DEST_LOCAL_FILE || destType == QBMetaData.DEST_DFS_FILE) {
// CTAS or CMV statement
genAutoColumnStatsGatheringPipeline(destinationTable, null, input, false, true);
}
}
return output;
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class SemanticAnalyzer method genReduceSinkPlan.
@SuppressWarnings("nls")
private Operator genReduceSinkPlan(Operator<?> input, List<ExprNodeDesc> partitionCols, List<ExprNodeDesc> sortCols, String sortOrder, String nullOrder, int numReducers, AcidUtils.Operation acidOp, boolean pullConstants, boolean isCompaction) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
Operator dummy = Operator.createDummy();
dummy.setParentOperators(Arrays.asList(input));
List<ExprNodeDesc> newSortCols = new ArrayList<ExprNodeDesc>();
StringBuilder newSortOrder = new StringBuilder();
StringBuilder newNullOrder = new StringBuilder();
List<ExprNodeDesc> sortColsBack = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < sortCols.size(); i++) {
ExprNodeDesc sortCol = sortCols.get(i);
// we are pulling constants but this is not a constant
if (!pullConstants || !(sortCol instanceof ExprNodeConstantDesc)) {
newSortCols.add(sortCol);
newSortOrder.append(sortOrder.charAt(i));
newNullOrder.append(nullOrder.charAt(i));
sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input));
}
}
// For the generation of the values expression just get the inputs
// signature and generate field expressions for those
RowResolver rsRR = new RowResolver();
List<String> outputColumns = new ArrayList<String>();
List<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> valueColsBack = new ArrayList<ExprNodeDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ExprNodeDesc> constantCols = new ArrayList<ExprNodeDesc>();
List<ColumnInfo> columnInfos = inputRR.getColumnInfos();
int[] index = new int[columnInfos.size()];
for (int i = 0; i < index.length; i++) {
ColumnInfo colInfo = columnInfos.get(i);
String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo);
// backtrack can be null when input is script operator
ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input);
if (pullConstants && valueBack instanceof ExprNodeConstantDesc) {
// ignore, it will be generated by SEL op
index[i] = Integer.MAX_VALUE;
constantCols.add(valueBack);
continue;
}
int kindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, sortColsBack);
if (kindex >= 0) {
index[i] = kindex;
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
newColInfo.setTabAlias(nm[0]);
rsRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
continue;
}
int vindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, valueColsBack);
if (vindex >= 0) {
index[i] = -vindex - 1;
continue;
}
index[i] = -valueCols.size() - 1;
String outputColName = getColumnInternalName(valueCols.size());
valueCols.add(value);
valueColsBack.add(valueBack);
ColumnInfo newColInfo = new ColumnInfo(colInfo);
newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
newColInfo.setTabAlias(nm[0]);
rsRR.put(nm[0], nm[1], newColInfo);
if (nm2 != null) {
rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
}
outputColumns.add(outputColName);
}
dummy.setParentOperators(null);
ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(newSortCols, valueCols, outputColumns, false, -1, partitionCols, newSortOrder.toString(), newNullOrder.toString(), defaultNullOrder, numReducers, acidOp, isCompaction);
Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, new RowSchema(rsRR.getColumnInfos()), input), rsRR);
List<String> keyColNames = rsdesc.getOutputKeyColumnNames();
for (int i = 0; i < keyColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), newSortCols.get(i));
}
List<String> valueColNames = rsdesc.getOutputValueColumnNames();
for (int i = 0; i < valueColNames.size(); i++) {
colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), valueCols.get(i));
}
interim.setColumnExprMap(colExprMap);
RowResolver selectRR = new RowResolver();
List<ExprNodeDesc> selCols = new ArrayList<ExprNodeDesc>();
List<String> selOutputCols = new ArrayList<String>();
Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
Iterator<ExprNodeDesc> constants = constantCols.iterator();
for (int i = 0; i < index.length; i++) {
ColumnInfo prev = columnInfos.get(i);
String[] nm = inputRR.reverseLookup(prev.getInternalName());
String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName());
ColumnInfo info = new ColumnInfo(prev);
ExprNodeDesc desc;
if (index[i] == Integer.MAX_VALUE) {
desc = constants.next();
} else {
String field;
if (index[i] >= 0) {
field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
} else {
field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
}
desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
}
selCols.add(desc);
String internalName = getColumnInternalName(i);
info.setInternalName(internalName);
selectRR.put(nm[0], nm[1], info);
if (nm2 != null) {
selectRR.addMappingOnly(nm2[0], nm2[1], info);
}
selOutputCols.add(internalName);
selColExprMap.put(internalName, desc);
}
SelectDesc select = new SelectDesc(selCols, selOutputCols);
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(select, new RowSchema(selectRR.getColumnInfos()), interim), selectRR);
output.setColumnExprMap(selColExprMap);
return output;
}
use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.
the class SemanticAnalyzer method insertSelectForSemijoin.
/**
* Construct a selection operator for semijoin that filter out all fields
* other than the group by keys.
*
* @param fields
* list of fields need to be output
* @param input
* input operator
* @return the selection operator.
* @throws SemanticException
*/
private Operator insertSelectForSemijoin(List<ASTNode> fields, Operator<?> input) throws SemanticException {
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
List<String> outputColumnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
RowResolver outputRR = new RowResolver();
// construct the list of columns that need to be projected
for (int i = 0; i < fields.size(); ++i) {
ASTNode field = fields.get(i);
String[] nm;
String[] nm2;
ExprNodeDesc expr = genExprNodeDesc(field, inputRR);
if (expr instanceof ExprNodeColumnDesc) {
// In most of the cases, this is a column reference
ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) expr;
nm = inputRR.reverseLookup(columnExpr.getColumn());
nm2 = inputRR.getAlternateMappings(columnExpr.getColumn());
} else if (expr instanceof ExprNodeConstantDesc) {
// However, it can be a constant too. In that case, we need to track
// the column that it originated from in the input operator so we can
// propagate the aliases.
ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) expr;
String inputCol = constantExpr.getFoldedFromCol();
nm = inputRR.reverseLookup(inputCol);
nm2 = inputRR.getAlternateMappings(inputCol);
} else {
// of the left semijoin
return input;
}
String colName = getColumnInternalName(i);
outputColumnNames.add(colName);
ColumnInfo colInfo = new ColumnInfo(colName, expr.getTypeInfo(), "", false);
outputRR.put(nm[0], nm[1], colInfo);
if (nm2 != null) {
outputRR.addMappingOnly(nm2[0], nm2[1], colInfo);
}
colList.add(expr);
colExprMap.put(colName, expr);
}
// create selection operator
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, outputColumnNames, false), new RowSchema(outputRR.getColumnInfos()), input), outputRR);
output.setColumnExprMap(colExprMap);
return output;
}
Aggregations