use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.
the class OpenTxnHandler method handle.
@Override
public List<Task<?>> handle(Context context) throws SemanticException {
if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
context.log.error("Cannot load transaction events as acid is not enabled");
throw new SemanticException("Cannot load transaction events as acid is not enabled");
}
OpenTxnMessage msg = deserializer.getOpenTxnMessage(context.dmd.getPayload());
Task<ReplTxnWork> openTxnTask = TaskFactory.get(new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), context.dbName, null, msg.getTxnIds(), ReplTxnWork.OperationType.REPL_OPEN_TXN, context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector()), context.hiveConf);
// Anyways, if this event gets executed again, it is taken care of.
if (!context.isDbNameEmpty()) {
updatedMetadata.set(context.dmd.getEventTo().toString(), context.dbName, null, null);
}
context.log.debug("Added Open txn task : {}", openTxnTask.getId());
return Collections.singletonList(openTxnTask);
}
use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.
the class AllocWriteIdHandler method handle.
@Override
public List<Task<?>> handle(Context context) throws SemanticException {
if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
context.log.error("Cannot load alloc write id event as acid is not enabled");
throw new SemanticException("Cannot load alloc write id event as acid is not enabled");
}
AllocWriteIdMessage msg = deserializer.getAllocWriteIdMessage(context.dmd.getPayload());
String dbName = (context.dbName != null && !context.dbName.isEmpty() ? context.dbName : msg.getDB());
// We need table name for alloc write id and that is received from source.
String tableName = msg.getTableName();
// Repl policy should be created based on the table name in context.
ReplTxnWork work = new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), dbName, tableName, ReplTxnWork.OperationType.REPL_ALLOC_WRITE_ID, msg.getTxnToWriteIdList(), context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector());
Task<?> allocWriteIdTask = TaskFactory.get(work, context.hiveConf);
context.log.info("Added alloc write id task : {}", allocWriteIdTask.getId());
updatedMetadata.set(context.dmd.getEventTo().toString(), dbName, tableName, null);
return Collections.singletonList(allocWriteIdTask);
}
use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.
the class CommitTxnHandler method handle.
@Override
public List<Task<?>> handle(Context context) throws SemanticException {
if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
context.log.error("Cannot load transaction events as acid is not enabled");
throw new SemanticException("Cannot load transaction events as acid is not enabled");
}
CommitTxnMessage msg = deserializer.getCommitTxnMessage(context.dmd.getPayload());
int numEntry = (msg.getTables() == null ? 0 : msg.getTables().size());
List<Task<?>> tasks = new ArrayList<>();
String dbName = context.dbName;
String tableNamePrev = null;
String tblName = null;
ReplTxnWork work = new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), context.dbName, null, msg.getTxnId(), ReplTxnWork.OperationType.REPL_COMMIT_TXN, context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector());
if (numEntry > 0) {
context.log.debug("Commit txn handler for txnid " + msg.getTxnId() + " databases : " + msg.getDatabases() + " tables : " + msg.getTables() + " partitions : " + msg.getPartitions() + " files : " + msg.getFilesList() + " write ids : " + msg.getWriteIds());
}
for (int idx = 0; idx < numEntry; idx++) {
String actualTblName = msg.getTables().get(idx);
String actualDBName = msg.getDatabases().get(idx);
String completeName = Table.getCompleteName(actualDBName, actualTblName);
// grouped together in commit txn message.
if (tableNamePrev == null || !(completeName.equals(tableNamePrev))) {
// The data location is created by source, so the location should be formed based on the table name in msg.
Path location = HiveUtils.getDumpPath(new Path(context.location), actualDBName, actualTblName);
tblName = actualTblName;
// for warehouse level dump, use db name from write event
dbName = (context.isDbNameEmpty() ? actualDBName : context.dbName);
Context currentContext = new Context(context, dbName, context.getDumpDirectory(), context.getMetricCollector());
currentContext.setLocation(location.toUri().toString());
// Piggybacking in Import logic for now
TableHandler tableHandler = new TableHandler();
tasks.addAll((tableHandler.handle(currentContext)));
readEntitySet.addAll(tableHandler.readEntities());
writeEntitySet.addAll(tableHandler.writeEntities());
getUpdatedMetadata().copyUpdatedMetadata(tableHandler.getUpdatedMetadata());
tableNamePrev = completeName;
}
try {
WriteEventInfo writeEventInfo = new WriteEventInfo(msg.getWriteIds().get(idx), dbName, tblName, msg.getFiles(idx));
if (msg.getPartitions().get(idx) != null && !msg.getPartitions().get(idx).isEmpty()) {
writeEventInfo.setPartition(msg.getPartitions().get(idx));
}
work.addWriteEventInfo(writeEventInfo);
} catch (Exception e) {
throw new SemanticException("Failed to extract write event info from commit txn message : " + e.getMessage());
}
}
Task<ReplTxnWork> commitTxnTask = TaskFactory.get(work, context.hiveConf);
// Anyways, if this event gets executed again, it is taken care of.
if (!context.isDbNameEmpty()) {
updatedMetadata.set(context.dmd.getEventTo().toString(), context.dbName, null, null);
}
context.log.debug("Added Commit txn task : {}", commitTxnTask.getId());
if (tasks.isEmpty()) {
// will be used for setting the last repl id.
return Collections.singletonList(commitTxnTask);
}
DAGTraversal.traverse(tasks, new AddDependencyToLeaves(commitTxnTask));
return tasks;
}
use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.
the class AbortTxnHandler method handle.
@Override
public List<Task<?>> handle(Context context) throws SemanticException {
if (!AcidUtils.isAcidEnabled(context.hiveConf)) {
context.log.error("Cannot load transaction events as acid is not enabled");
throw new SemanticException("Cannot load transaction events as acid is not enabled");
}
AbortTxnMessage msg = deserializer.getAbortTxnMessage(context.dmd.getPayload());
Task<ReplTxnWork> abortTxnTask = TaskFactory.get(new ReplTxnWork(HiveUtils.getReplPolicy(context.dbName), context.dbName, null, msg.getTxnId(), ReplTxnWork.OperationType.REPL_ABORT_TXN, context.eventOnlyReplicationSpec(), context.getDumpDirectory(), context.getMetricCollector()), context.hiveConf);
// Anyways, if this event gets executed again, it is taken care of.
if (!context.isDbNameEmpty()) {
updatedMetadata.set(context.dmd.getEventTo().toString(), context.dbName, null, null);
}
context.log.debug("Added Abort txn task : {}", abortTxnTask.getId());
return Collections.singletonList(abortTxnTask);
}
use of org.apache.hadoop.hive.ql.plan.ReplTxnWork in project hive by apache.
the class LoadTable method newTableTasks.
private void newTableTasks(ImportTableDesc tblDesc, Task<?> tblRootTask, TableLocationTuple tuple) throws Exception {
Table table = tblDesc.toTable(context.hiveConf);
ReplicationSpec replicationSpec = event.replicationSpec();
if (!tblDesc.isExternal()) {
tblDesc.setLocation(null);
}
Task<?> createTableTask = tblDesc.getCreateTableTask(new HashSet<>(), new HashSet<>(), context.hiveConf, true, (new Path(context.dumpDirectory)).getParent().toString(), metricCollector, true);
if (tblRootTask == null) {
tblRootTask = createTableTask;
} else {
tblRootTask.addDependentTask(createTableTask);
}
if (replicationSpec.isMetadataOnly()) {
tracker.addTask(tblRootTask);
return;
}
Task<?> parentTask = createTableTask;
if (replicationSpec.isTransactionalTableDump()) {
List<String> partNames = isPartitioned(tblDesc) ? event.partitions(tblDesc) : null;
ReplTxnWork replTxnWork = new ReplTxnWork(tblDesc.getDatabaseName(), tblDesc.getTableName(), partNames, replicationSpec.getValidWriteIdList(), ReplTxnWork.OperationType.REPL_WRITEID_STATE, (new Path(context.dumpDirectory)).getParent().toString(), metricCollector);
Task<?> replTxnTask = TaskFactory.get(replTxnWork, context.hiveConf);
parentTask.addDependentTask(replTxnTask);
parentTask = replTxnTask;
}
boolean shouldCreateLoadTableTask = (!isPartitioned(tblDesc) && !TableType.EXTERNAL_TABLE.equals(table.getTableType())) || tuple.isConvertedFromManagedToExternal;
if (shouldCreateLoadTableTask) {
LOG.debug("adding dependent ReplTxnTask/CopyWork/MoveWork for table");
Task<?> loadTableTask = loadTableTask(table, replicationSpec, table.getDataLocation(), event.dataPath());
parentTask.addDependentTask(loadTableTask);
}
tracker.addTask(tblRootTask);
}
Aggregations