use of org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer in project hive by apache.
the class DDLTask method createView.
/**
* Create a new view.
*
* @param db
* The database in question.
* @param crtView
* This is the view we're creating.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
*/
private int createView(Hive db, CreateViewDesc crtView) throws HiveException {
Table oldview = db.getTable(crtView.getViewName(), false);
if (oldview != null) {
// Check whether we are replicating
if (crtView.getReplicationSpec().isInReplicationScope()) {
// if this is a replication spec, then replace-mode semantics might apply.
if (crtView.getReplicationSpec().allowEventReplacementInto(oldview.getParameters())) {
// we replace existing view.
crtView.setReplace(true);
} else {
LOG.debug("DDLTask: Create View is skipped as view {} is newer than update", // no replacement, the existing table state is newer than our update.
crtView.getViewName());
return 0;
}
}
if (!crtView.isReplace()) {
// View already exists, thus we should be replacing
throw new HiveException(ErrorMsg.TABLE_ALREADY_EXISTS.getMsg(crtView.getViewName()));
}
// It should not be a materialized view
assert !crtView.isMaterialized();
// replace existing view
// remove the existing partition columns from the field schema
oldview.setViewOriginalText(crtView.getViewOriginalText());
oldview.setViewExpandedText(crtView.getViewExpandedText());
oldview.setFields(crtView.getSchema());
if (crtView.getComment() != null) {
oldview.setProperty("comment", crtView.getComment());
}
if (crtView.getTblProps() != null) {
oldview.getTTable().getParameters().putAll(crtView.getTblProps());
}
oldview.setPartCols(crtView.getPartCols());
if (crtView.getInputFormat() != null) {
oldview.setInputFormatClass(crtView.getInputFormat());
}
if (crtView.getOutputFormat() != null) {
oldview.setOutputFormatClass(crtView.getOutputFormat());
}
oldview.checkValidity(null);
db.alterTable(crtView.getViewName(), oldview, null);
addIfAbsentByName(new WriteEntity(oldview, WriteEntity.WriteType.DDL_NO_LOCK));
} else {
// We create new view
Table tbl = crtView.toTable(conf);
// We set the signature for the view if it is a materialized view
if (tbl.isMaterializedView()) {
CreationMetadata cm = new CreationMetadata(tbl.getDbName(), tbl.getTableName(), ImmutableSet.copyOf(crtView.getTablesUsed()));
cm.setValidTxnList(conf.get(ValidTxnList.VALID_TXNS_KEY));
tbl.getTTable().setCreationMetadata(cm);
}
db.createTable(tbl, crtView.getIfNotExists());
addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
// set lineage info
DataContainer dc = new DataContainer(tbl.getTTable());
queryState.getLineageState().setLineage(new Path(crtView.getViewName()), dc, tbl.getCols());
}
return 0;
}
use of org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer in project hive by apache.
the class DDLTask method createTable.
/**
* Create a new table.
*
* @param db
* The database in question.
* @param crtTbl
* This is the table we're creating.
* @return Returns 0 when execution succeeds and above 0 if it fails.
* @throws HiveException
* Throws this exception if an unexpected error occurs.
*/
private int createTable(Hive db, CreateTableDesc crtTbl) throws HiveException {
// create the table
Table tbl = crtTbl.toTable(conf);
List<SQLPrimaryKey> primaryKeys = crtTbl.getPrimaryKeys();
List<SQLForeignKey> foreignKeys = crtTbl.getForeignKeys();
List<SQLUniqueConstraint> uniqueConstraints = crtTbl.getUniqueConstraints();
List<SQLNotNullConstraint> notNullConstraints = crtTbl.getNotNullConstraints();
List<SQLDefaultConstraint> defaultConstraints = crtTbl.getDefaultConstraints();
List<SQLCheckConstraint> checkConstraints = crtTbl.getCheckConstraints();
LOG.debug("creating table {} on {}", tbl.getFullyQualifiedName(), tbl.getDataLocation());
if (crtTbl.getReplicationSpec().isInReplicationScope() && (!crtTbl.getReplaceMode())) {
// if this is a replication spec, then replace-mode semantics might apply.
// if we're already asking for a table replacement, then we can skip this check.
// however, otherwise, if in replication scope, and we've not been explicitly asked
// to replace, we should check if the object we're looking at exists, and if so,
// trigger replace-mode semantics.
Table existingTable = db.getTable(tbl.getDbName(), tbl.getTableName(), false);
if (existingTable != null) {
if (crtTbl.getReplicationSpec().allowEventReplacementInto(existingTable.getParameters())) {
// we replace existing table.
crtTbl.setReplaceMode(true);
} else {
LOG.debug("DDLTask: Create Table is skipped as table {} is newer than update", crtTbl.getTableName());
// no replacement, the existing table state is newer than our update.
return 0;
}
}
}
// create the table
if (crtTbl.getReplaceMode()) {
// replace-mode creates are really alters using CreateTableDesc.
db.alterTable(tbl, null);
} else {
if ((foreignKeys != null && foreignKeys.size() > 0) || (primaryKeys != null && primaryKeys.size() > 0) || (uniqueConstraints != null && uniqueConstraints.size() > 0) || (notNullConstraints != null && notNullConstraints.size() > 0) || (checkConstraints != null && checkConstraints.size() > 0) || defaultConstraints != null && defaultConstraints.size() > 0) {
db.createTable(tbl, crtTbl.getIfNotExists(), primaryKeys, foreignKeys, uniqueConstraints, notNullConstraints, defaultConstraints, checkConstraints);
} else {
db.createTable(tbl, crtTbl.getIfNotExists());
}
Long mmWriteId = crtTbl.getInitialMmWriteId();
if (crtTbl.isCTAS() || mmWriteId != null) {
Table createdTable = db.getTable(tbl.getDbName(), tbl.getTableName());
if (crtTbl.isCTAS()) {
DataContainer dc = new DataContainer(createdTable.getTTable());
queryState.getLineageState().setLineage(createdTable.getPath(), dc, createdTable.getCols());
}
}
}
addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
return 0;
}
use of org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer in project hive by apache.
the class MoveTask method handleStaticParts.
private DataContainer handleStaticParts(Hive db, Table table, LoadTableDesc tbd, TaskInformation ti) throws HiveException, IOException, InvalidOperationException {
List<String> partVals = MetaStoreUtils.getPvals(table.getPartCols(), tbd.getPartitionSpec());
db.validatePartitionNameCharacters(partVals);
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("loadPartition called from " + tbd.getSourcePath() + " into " + tbd.getTable().getTableName());
}
db.loadPartition(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getLoadFileType(), tbd.getInheritTableSpecs(), isSkewedStoredAsDirs(tbd), work.isSrcLocal(), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && !tbd.isMmTable(), hasFollowingStatsTask(), tbd.getWriteId(), tbd.getStmtId());
Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
// See the comment inside updatePartitionBucketSortColumns.
if (!tbd.isMmTable() && (ti.bucketCols != null || ti.sortCols != null)) {
updatePartitionBucketSortColumns(db, table, partn, ti.bucketCols, ti.numBuckets, ti.sortCols);
}
DataContainer dc = new DataContainer(table.getTTable(), partn.getTPartition());
// add this partition to post-execution hook
if (work.getOutputs() != null) {
DDLTask.addIfAbsentByName(new WriteEntity(partn, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs());
}
return dc;
}
use of org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer in project hive by apache.
the class MoveTask method execute.
@Override
public int execute(DriverContext driverContext) {
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("Executing MoveWork " + System.identityHashCode(work) + " with " + work.getLoadFileWork() + "; " + work.getLoadTableWork() + "; " + work.getLoadMultiFilesWork());
}
try {
if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) {
return 0;
}
Hive db = getHive();
// Do any hive related operations like moving tables and files
// to appropriate locations
LoadFileDesc lfd = work.getLoadFileWork();
if (lfd != null) {
Path targetPath = lfd.getTargetDir();
Path sourcePath = lfd.getSourcePath();
if (targetPath.equals(sourcePath)) {
Utilities.FILE_OP_LOGGER.debug("MoveTask not moving " + sourcePath);
} else {
Utilities.FILE_OP_LOGGER.debug("MoveTask moving " + sourcePath + " to " + targetPath);
if (lfd.getWriteType() == AcidUtils.Operation.INSERT) {
// 'sourcePath' result of 'select ...' part of CTAS statement
assert lfd.getIsDfsDir();
FileSystem srcFs = sourcePath.getFileSystem(conf);
FileStatus[] srcs = srcFs.globStatus(sourcePath);
if (srcs != null) {
List<Path> newFiles = new ArrayList<>();
Hive.moveAcidFiles(srcFs, srcs, targetPath, newFiles);
} else {
LOG.debug("No files found to move from " + sourcePath + " to " + targetPath);
}
} else {
moveFile(sourcePath, targetPath, lfd.getIsDfsDir());
}
}
}
// Multi-file load is for dynamic partitions when some partitions do not
// need to merge and they can simply be moved to the target directory.
// This is also used for MM table conversion.
LoadMultiFilesDesc lmfd = work.getLoadMultiFilesWork();
if (lmfd != null) {
boolean isDfsDir = lmfd.getIsDfsDir();
List<String> targetPrefixes = lmfd.getTargetPrefixes();
for (int i = 0; i < lmfd.getSourceDirs().size(); ++i) {
Path srcPath = lmfd.getSourceDirs().get(i);
Path destPath = lmfd.getTargetDirs().get(i);
String filePrefix = targetPrefixes == null ? null : targetPrefixes.get(i);
FileSystem destFs = destPath.getFileSystem(conf);
if (filePrefix == null) {
if (!destFs.exists(destPath.getParent())) {
destFs.mkdirs(destPath.getParent());
}
Utilities.FILE_OP_LOGGER.debug("MoveTask moving (multi-file) " + srcPath + " to " + destPath);
moveFile(srcPath, destPath, isDfsDir);
} else {
if (!destFs.exists(destPath)) {
destFs.mkdirs(destPath);
}
FileSystem srcFs = srcPath.getFileSystem(conf);
FileStatus[] children = srcFs.listStatus(srcPath);
if (children != null) {
for (FileStatus child : children) {
Path childSrc = child.getPath();
Path childDest = new Path(destPath, filePrefix + childSrc.getName());
Utilities.FILE_OP_LOGGER.debug("MoveTask moving (multi-file) " + childSrc + " to " + childDest);
moveFile(childSrc, childDest, isDfsDir);
}
} else {
Utilities.FILE_OP_LOGGER.debug("MoveTask skipping empty directory (multi-file) " + srcPath);
}
if (!srcFs.delete(srcPath, false)) {
throw new IOException("Couldn't delete " + srcPath + " after moving all the files");
}
}
}
}
// Next we do this for tables and partitions
LoadTableDesc tbd = work.getLoadTableWork();
if (tbd != null) {
logMessage(tbd);
Table table = db.getTable(tbd.getTable().getTableName());
checkFileFormats(db, tbd, table);
boolean isFullAcidOp = work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && // it seems that LoadTableDesc has Operation.INSERT only for CTAS...
!tbd.isMmTable();
// Create a data container
DataContainer dc = null;
if (tbd.getPartitionSpec().size() == 0) {
dc = new DataContainer(table.getTTable());
if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
Utilities.FILE_OP_LOGGER.trace("loadTable called from " + tbd.getSourcePath() + " into " + tbd.getTable().getTableName());
}
db.loadTable(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getLoadFileType(), work.isSrcLocal(), isSkewedStoredAsDirs(tbd), isFullAcidOp, hasFollowingStatsTask(), tbd.getWriteId(), tbd.getStmtId());
if (work.getOutputs() != null) {
DDLTask.addIfAbsentByName(new WriteEntity(table, getWriteType(tbd, work.getLoadTableWork().getWriteType())), work.getOutputs());
}
} else {
LOG.info("Partition is: {}", tbd.getPartitionSpec());
// Check if the bucketing and/or sorting columns were inferred
TaskInformation ti = new TaskInformation(this, tbd.getSourcePath().toUri().toString());
inferTaskInformation(ti);
// deal with dynamic partitions
DynamicPartitionCtx dpCtx = tbd.getDPCtx();
if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
// dynamic partitions
dc = handleDynParts(db, table, tbd, ti, dpCtx);
} else {
// static partitions
dc = handleStaticParts(db, table, tbd, ti);
}
}
if (dc != null) {
// If we are doing an update or a delete the number of columns in the table will not
// match the number of columns in the file sink. For update there will be one too many
// (because of the ROW__ID), and in the case of the delete there will be just the
// ROW__ID, which we don't need to worry about from a lineage perspective.
List<FieldSchema> tableCols = null;
switch(work.getLoadTableWork().getWriteType()) {
case DELETE:
case UPDATE:
// Pass an empty list as no columns will be written to the file.
// TODO I should be able to make this work for update
tableCols = new ArrayList<>();
break;
default:
tableCols = table.getCols();
break;
}
queryState.getLineageState().setLineage(tbd.getSourcePath(), dc, tableCols);
}
releaseLocks(tbd);
}
return 0;
} catch (HiveException he) {
int errorCode = 1;
if (he.getCanonicalErrorMsg() != ErrorMsg.GENERIC_ERROR) {
errorCode = he.getCanonicalErrorMsg().getErrorCode();
if (he.getCanonicalErrorMsg() == ErrorMsg.UNRESOLVED_RT_EXCEPTION) {
console.printError("Failed with exception " + he.getMessage(), "\n" + StringUtils.stringifyException(he));
} else {
console.printError("Failed with exception " + he.getMessage() + "\nRemote Exception: " + he.getRemoteErrorMsg());
console.printInfo("\n", StringUtils.stringifyException(he), false);
}
}
setException(he);
return errorCode;
} catch (Exception e) {
console.printError("Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e));
setException(e);
return (1);
}
}
use of org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer in project hive by apache.
the class MoveTask method handleDynParts.
private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, TaskInformation ti, DynamicPartitionCtx dpCtx) throws HiveException, IOException, InvalidOperationException {
DataContainer dc;
List<LinkedHashMap<String, String>> dps = Utilities.getFullDPSpecs(conf, dpCtx);
console.printInfo(System.getProperty("line.separator"));
long startTime = System.currentTimeMillis();
// load the list of DP partitions and return the list of partition specs
// TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions
// to use Utilities.getFullDPSpecs() to get the list of full partSpecs.
// After that check the number of DPs created to not exceed the limit and
// iterate over it and call loadPartition() here.
// The reason we don't do inside HIVE-1361 is the latter is large and we
// want to isolate any potential issue it may introduce.
Map<Map<String, String>, Partition> dp = db.loadDynamicPartitions(tbd.getSourcePath(), tbd.getTable().getTableName(), tbd.getPartitionSpec(), tbd.getLoadFileType(), dpCtx.getNumDPCols(), (tbd.getLbCtx() == null) ? 0 : tbd.getLbCtx().calculateListBucketingLevel(), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && !tbd.isMmTable(), work.getLoadTableWork().getWriteId(), tbd.getStmtId(), hasFollowingStatsTask(), work.getLoadTableWork().getWriteType(), tbd.isInsertOverwrite());
// publish DP columns to its subscribers
if (dps != null && dps.size() > 0) {
pushFeed(FeedType.DYNAMIC_PARTITIONS, dp.values());
}
String loadTime = "\t Time taken to load dynamic partitions: " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds";
console.printInfo(loadTime);
LOG.info(loadTime);
if (dp.size() == 0 && conf.getBoolVar(HiveConf.ConfVars.HIVE_ERROR_ON_EMPTY_PARTITION)) {
throw new HiveException("This query creates no partitions." + " To turn off this error, set hive.error.on.empty.partition=false.");
}
startTime = System.currentTimeMillis();
// and put it to WriteEntity for post-exec hook
for (Map.Entry<Map<String, String>, Partition> entry : dp.entrySet()) {
Partition partn = entry.getValue();
// See the comment inside updatePartitionBucketSortColumns.
if (!tbd.isMmTable() && (ti.bucketCols != null || ti.sortCols != null)) {
updatePartitionBucketSortColumns(db, table, partn, ti.bucketCols, ti.numBuckets, ti.sortCols);
}
WriteEntity enty = new WriteEntity(partn, getWriteType(tbd, work.getLoadTableWork().getWriteType()));
if (work.getOutputs() != null) {
DDLTask.addIfAbsentByName(enty, work.getOutputs());
}
// queryPlan here.
if (queryPlan.getOutputs() == null) {
queryPlan.setOutputs(new LinkedHashSet<WriteEntity>());
}
queryPlan.getOutputs().add(enty);
// update columnar lineage for each partition
dc = new DataContainer(table.getTTable(), partn.getTPartition());
// Don't set lineage on delete as we don't have all the columns
if (work.getLoadTableWork().getWriteType() != AcidUtils.Operation.DELETE && work.getLoadTableWork().getWriteType() != AcidUtils.Operation.UPDATE) {
queryState.getLineageState().setLineage(tbd.getSourcePath(), dc, table.getCols());
}
LOG.info("Loading partition " + entry.getKey());
}
console.printInfo("\t Time taken for adding to write entity : " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
// reset data container to prevent it being added again.
dc = null;
return dc;
}
Aggregations