use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class ImportSemanticAnalyzer method createReplImportTasks.
/**
* Create tasks for repl import
*/
private static void createReplImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs, ReplicationSpec replicationSpec, boolean waitOnPrecursor, Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x, Long writeId, int stmtId, boolean isSourceMm, UpdatedMetaDataTracker updatedMetadata) throws HiveException, URISyntaxException, IOException, MetaException {
Task<?> dr = null;
WriteEntity.WriteType lockType = WriteEntity.WriteType.DDL_NO_LOCK;
// Normally, on import, trying to create a table or a partition in a db that does not yet exist
// is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
// to create tasks to create a table inside a db that as-of-now does not exist, but there is
// a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
// defaults and do not error out in that case.
Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
if (parentDb == null) {
if (!waitOnPrecursor) {
throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tblDesc.getDatabaseName()));
}
}
if (table != null) {
if (!replicationSpec.allowReplacementInto(table.getParameters())) {
// If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
x.getLOG().info("Table {}.{} is not replaced as it is newer than the update", tblDesc.getDatabaseName(), tblDesc.getTableName());
return;
}
} else {
// If table doesn't exist, allow creating a new one only if the database state is older than the update.
if ((parentDb != null) && (!replicationSpec.allowReplacementInto(parentDb.getParameters()))) {
// If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
x.getLOG().info("Table {}.{} is not created as the database is newer than the update", tblDesc.getDatabaseName(), tblDesc.getTableName());
return;
}
}
if (updatedMetadata != null) {
updatedMetadata.set(replicationSpec.getReplicationState(), tblDesc.getDatabaseName(), tblDesc.getTableName(), null);
}
if (tblDesc.getLocation() == null) {
if (!waitOnPrecursor) {
tblDesc.setLocation(wh.getDefaultTablePath(parentDb, tblDesc.getTableName()).toString());
} else {
tblDesc.setLocation(wh.getDnsPath(new Path(wh.getDefaultDatabasePath(tblDesc.getDatabaseName()), org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(tblDesc.getTableName().toLowerCase()))).toString());
}
}
/* Note: In the following section, Metadata-only import handling logic is
interleaved with regular repl-import logic. The rule of thumb being
followed here is that MD-only imports are essentially ALTERs. They do
not load data, and should not be "creating" any metadata - they should
be replacing instead. The only place it makes sense for a MD-only import
to create is in the case of a table that's been dropped and recreated,
or in the case of an unpartitioned table. In all other cases, it should
behave like a noop or a pure MD alter.
*/
if (table == null) {
if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
lockType = WriteEntity.WriteType.DDL_SHARED;
}
Task t = createTableTask(tblDesc, x);
table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
if (!replicationSpec.isMetadataOnly()) {
if (isPartitioned(tblDesc)) {
Task<?> ict = createImportCommitTask(tblDesc.getDatabaseName(), tblDesc.getTableName(), writeId, stmtId, AcidUtils.isInsertOnlyTable(tblDesc.getTblProps()));
for (AddPartitionDesc addPartitionDesc : partitionDescs) {
addPartitionDesc.setReplicationSpec(replicationSpec);
t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
if (updatedMetadata != null) {
updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec());
}
}
} else {
x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x, writeId, stmtId, isSourceMm));
}
}
// Simply create
x.getTasks().add(t);
} else {
// Table existed, and is okay to replicate into, not dropping and re-creating.
if (table.isPartitioned()) {
x.getLOG().debug("table partitioned");
for (AddPartitionDesc addPartitionDesc : partitionDescs) {
addPartitionDesc.setReplicationSpec(replicationSpec);
Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
Task<?> ict = replicationSpec.isMetadataOnly() ? null : createImportCommitTask(tblDesc.getDatabaseName(), tblDesc.getTableName(), writeId, stmtId, AcidUtils.isInsertOnlyTable(tblDesc.getTblProps()));
if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
if (!replicationSpec.isMetadataOnly()) {
x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
if (updatedMetadata != null) {
updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec());
}
} else {
x.getTasks().add(alterSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, null, x));
if (updatedMetadata != null) {
updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec());
}
}
} else {
// the destination ptn's repl.last.id is older than the replacement's.
if (replicationSpec.allowReplacementInto(ptn.getParameters())) {
if (!replicationSpec.isMetadataOnly()) {
x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
} else {
x.getTasks().add(alterSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, ptn, x));
}
if (updatedMetadata != null) {
updatedMetadata.addPartition(addPartitionDesc.getPartition(0).getPartSpec());
}
if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
lockType = WriteEntity.WriteType.DDL_SHARED;
}
}
}
}
if (replicationSpec.isMetadataOnly() && partitionDescs.isEmpty()) {
// MD-ONLY table alter
x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
lockType = WriteEntity.WriteType.DDL_SHARED;
}
}
} else {
x.getLOG().debug("table non-partitioned");
if (!replicationSpec.isMetadataOnly()) {
// repl-imports are replace-into unless the event is insert-into
loadTable(fromURI, table, replicationSpec.isReplace(), new Path(fromURI), replicationSpec, x, writeId, stmtId, isSourceMm);
} else {
x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
}
if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
lockType = WriteEntity.WriteType.DDL_SHARED;
}
}
}
x.getOutputs().add(new WriteEntity(table, lockType));
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class ImportSemanticAnalyzer method createRegularImportTasks.
/**
* Create tasks for regular import, no repl complexity
* @param tblDesc
* @param partitionDescs
* @param isPartSpecSet
* @param replicationSpec
* @param table
* @param fromURI
* @param fs
* @param wh
*/
private static void createRegularImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x, Long writeId, int stmtId, boolean isSourceMm) throws HiveException, URISyntaxException, IOException, MetaException {
if (table != null) {
if (table.isPartitioned()) {
x.getLOG().debug("table partitioned");
Task<?> ict = createImportCommitTask(table.getDbName(), table.getTableName(), writeId, stmtId, AcidUtils.isInsertOnlyTable(table.getParameters()));
for (AddPartitionDesc addPartitionDesc : partitionDescs) {
Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
} else {
throw new SemanticException(ErrorMsg.PARTITION_EXISTS.getMsg(partSpecToString(partSpec)));
}
}
} else {
x.getLOG().debug("table non-partitioned");
// ensure if destination is not empty only for regular import
Path tgtPath = new Path(table.getDataLocation().toString());
FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf());
checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x.getLOG());
loadTable(fromURI, table, false, tgtPath, replicationSpec, x, writeId, stmtId, isSourceMm);
}
// Set this to read because we can't overwrite any existing partitions
x.getOutputs().add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK));
} else {
x.getLOG().debug("table " + tblDesc.getTableName() + " does not exist");
Task<?> t = createTableTask(tblDesc, x);
table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
// Since we are going to be creating a new table in a db, we should mark that db as a write entity
// so that the auth framework can go to work there.
x.getOutputs().add(new WriteEntity(parentDb, WriteEntity.WriteType.DDL_SHARED));
if (isPartitioned(tblDesc)) {
Task<?> ict = createImportCommitTask(tblDesc.getDatabaseName(), tblDesc.getTableName(), writeId, stmtId, AcidUtils.isInsertOnlyTable(tblDesc.getTblProps()));
for (AddPartitionDesc addPartitionDesc : partitionDescs) {
t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, isSourceMm, ict));
}
} else {
x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
if (tblDesc.isExternal() && (tblDesc.getLocation() == null)) {
x.getLOG().debug("Importing in place, no emptiness check, no copying/loading");
Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME);
tblDesc.setLocation(dataPath.toString());
} else {
Path tablePath = null;
if (tblDesc.getLocation() != null) {
tablePath = new Path(tblDesc.getLocation());
} else {
tablePath = wh.getDefaultTablePath(parentDb, tblDesc.getTableName());
}
FileSystem tgtFs = FileSystem.get(tablePath.toUri(), x.getConf());
checkTargetLocationEmpty(tgtFs, tablePath, replicationSpec, x.getLOG());
if (isSourceMm) {
// since target table doesn't exist, it should inherit soruce table's properties
Map<String, String> tblproperties = table.getParameters();
tblproperties.put("transactional", "true");
tblproperties.put("transactional_properties", "insert_only");
table.setParameters(tblproperties);
}
t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x, writeId, stmtId, isSourceMm));
}
}
x.getTasks().add(t);
}
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class LoadSemanticAnalyzer method analyzeInternal.
@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
boolean isLocal = false;
boolean isOverWrite = false;
Tree fromTree = ast.getChild(0);
Tree tableTree = ast.getChild(1);
if (ast.getChildCount() == 4) {
isLocal = true;
isOverWrite = true;
}
if (ast.getChildCount() == 3) {
if (ast.getChild(2).getText().toLowerCase().equals("local")) {
isLocal = true;
} else {
isOverWrite = true;
}
}
// initialize load path
URI fromURI;
try {
String fromPath = stripQuotes(fromTree.getText());
fromURI = initializeFromURI(fromPath, isLocal);
} catch (IOException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
} catch (URISyntaxException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
}
// initialize destination table/partition
TableSpec ts = new TableSpec(db, conf, (ASTNode) tableTree);
if (ts.tableHandle.isView() || ts.tableHandle.isMaterializedView()) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
if (ts.tableHandle.isNonNative()) {
throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
}
if (ts.tableHandle.isStoredAsSubDirectories()) {
throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg());
}
List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) {
throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
}
List<String> bucketCols = ts.tableHandle.getBucketCols();
if (bucketCols != null && !bucketCols.isEmpty()) {
String error = StrictChecks.checkBucketing(conf);
if (error != null) {
throw new SemanticException("Please load into an intermediate table" + " and use 'insert... select' to allow Hive to enforce bucketing. " + error);
}
}
// make sure the arguments make sense
List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, fromTree, isLocal, ts.tableHandle);
// for managed tables, make sure the file formats match
if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType()) && conf.getBoolVar(HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
ensureFileFormatsMatch(ts, files, fromURI);
}
inputs.add(toReadEntity(new Path(fromURI)));
Task<? extends Serializable> rTask = null;
// create final load/move work
boolean preservePartitionSpecs = false;
Map<String, String> partSpec = ts.getPartSpec();
if (partSpec == null) {
partSpec = new LinkedHashMap<String, String>();
outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
} else {
try {
Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
if (part != null) {
if (isOverWrite) {
outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT_OVERWRITE));
} else {
outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT));
// If partition already exists and we aren't overwriting it, then respect
// its current location info rather than picking it from the parent TableDesc
preservePartitionSpecs = true;
}
} else {
outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
}
} catch (HiveException e) {
throw new SemanticException(e);
}
}
Long writeId = null;
int stmtId = -1;
if (AcidUtils.isTransactionalTable(ts.tableHandle)) {
try {
writeId = SessionState.get().getTxnMgr().getTableWriteId(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
} catch (LockException ex) {
throw new SemanticException("Failed to allocate the write id", ex);
}
stmtId = SessionState.get().getTxnMgr().getStmtIdAndIncrement();
}
// Note: this sets LoadFileType incorrectly for ACID; is that relevant for load?
// See setLoadFileType and setIsAcidIow calls elsewhere for an example.
LoadTableDesc loadTableWork = new LoadTableDesc(new Path(fromURI), Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING, writeId);
loadTableWork.setStmtId(stmtId);
if (preservePartitionSpecs) {
// Note : preservePartitionSpecs=true implies inheritTableSpecs=false but
// but preservePartitionSpecs=false(default) here is not sufficient enough
// info to set inheritTableSpecs=true
loadTableWork.setInheritTableSpecs(false);
}
Task<? extends Serializable> childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true, isLocal));
if (rTask != null) {
rTask.addDependentTask(childTask);
} else {
rTask = childTask;
}
rootTasks.add(rTask);
// The user asked for stats to be collected.
// Some stats like number of rows require a scan of the data
// However, some other stats, like number of files, do not require a complete scan
// Update the stats which do not require a complete scan.
Task<? extends Serializable> statTask = null;
if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
BasicStatsWork basicStatsWork = new BasicStatsWork(loadTableWork);
basicStatsWork.setNoStatsAggregator(true);
basicStatsWork.setClearAggregatorStats(true);
StatsWork columnStatsWork = new StatsWork(ts.tableHandle, basicStatsWork, conf);
statTask = TaskFactory.get(columnStatsWork);
}
if (statTask != null) {
childTask.addDependentTask(statTask);
}
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class TestDbTxnManager method testDDLExclusive.
@Test
public void testDDLExclusive() throws Exception {
WriteEntity we = addTableOutput(WriteEntity.WriteType.DDL_EXCLUSIVE);
QueryPlan qp = new MockQueryPlan(this, HiveOperation.DROPTABLE);
txnMgr.openTxn(ctx, "fred");
txnMgr.acquireLocks(qp, ctx, "fred");
List<HiveLock> locks = ctx.getHiveLocks();
Assert.assertEquals(1, locks.size());
Assert.assertEquals(1, TxnDbUtil.countLockComponents(conf, ((DbLockManager.DbHiveLock) locks.get(0)).lockId));
txnMgr.rollbackTxn();
locks = txnMgr.getLockManager().getLocks(false, false);
Assert.assertEquals(0, locks.size());
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class TestDbTxnManager method setUp.
@Before
public void setUp() throws Exception {
TxnDbUtil.prepDb(conf);
txnMgr = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
// init lock manager
txnMgr.getLockManager();
Assert.assertTrue(txnMgr instanceof DbTxnManager);
nextInput = 1;
readEntities = new HashSet<ReadEntity>();
writeEntities = new HashSet<WriteEntity>();
conf.setTimeVar(HiveConf.ConfVars.HIVE_TIMEDOUT_TXN_REAPER_START, 0, TimeUnit.SECONDS);
conf.setTimeVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT, 10, TimeUnit.SECONDS);
houseKeeperService = new AcidHouseKeeperService();
houseKeeperService.setConf(conf);
}
Aggregations