use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class ImportSemanticAnalyzer method createReplImportTasks.
/**
* Create tasks for repl import
*/
private static void createReplImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, boolean waitOnPrecursor, Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x) throws HiveException, URISyntaxException, IOException, MetaException {
Task dr = null;
WriteEntity.WriteType lockType = WriteEntity.WriteType.DDL_NO_LOCK;
if ((table != null) && (isPartitioned(tblDesc) != table.isPartitioned())) {
// drop and re-create.
if (replicationSpec.allowReplacementInto(table)) {
dr = dropTableTask(table, x);
lockType = WriteEntity.WriteType.DDL_EXCLUSIVE;
// null it out so we go into the table re-create flow.
table = null;
} else {
// noop out of here.
return;
}
}
// Normally, on import, trying to create a table or a partition in a db that does not yet exist
// is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
// to create tasks to create a table inside a db that as-of-now does not exist, but there is
// a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
// defaults and do not error out in that case.
Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
if (parentDb == null) {
if (!waitOnPrecursor) {
throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tblDesc.getDatabaseName()));
}
}
if (tblDesc.getLocation() == null) {
if (!waitOnPrecursor) {
tblDesc.setLocation(wh.getTablePath(parentDb, tblDesc.getTableName()).toString());
} else {
tblDesc.setLocation(wh.getDnsPath(new Path(wh.getDefaultDatabasePath(tblDesc.getDatabaseName()), MetaStoreUtils.encodeTableName(tblDesc.getTableName().toLowerCase()))).toString());
}
}
if (table == null) {
if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
lockType = WriteEntity.WriteType.DDL_SHARED;
}
Task t = createTableTask(tblDesc, x);
table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
if (!replicationSpec.isMetadataOnly()) {
if (isPartitioned(tblDesc)) {
for (AddPartitionDesc addPartitionDesc : partitionDescs) {
addPartitionDesc.setReplicationSpec(replicationSpec);
t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
}
} else {
x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x));
}
}
if (dr == null) {
// Simply create
x.getTasks().add(t);
} else {
// Drop and recreate
dr.addDependentTask(t);
x.getTasks().add(dr);
}
} else {
// Table existed, and is okay to replicate into, not dropping and re-creating.
if (table.isPartitioned()) {
x.getLOG().debug("table partitioned");
for (AddPartitionDesc addPartitionDesc : partitionDescs) {
addPartitionDesc.setReplicationSpec(replicationSpec);
Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
if (!replicationSpec.isMetadataOnly()) {
x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
}
} else {
// the destination ptn's repl.last.id is older than the replacement's.
if (replicationSpec.allowReplacementInto(ptn)) {
if (!replicationSpec.isMetadataOnly()) {
x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
} else {
x.getTasks().add(alterSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, ptn, x));
}
if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
lockType = WriteEntity.WriteType.DDL_SHARED;
}
} else {
// ignore this ptn, do nothing, not an error.
}
}
}
if (replicationSpec.isMetadataOnly() && partitionDescs.isEmpty()) {
// MD-ONLY table alter
x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
lockType = WriteEntity.WriteType.DDL_SHARED;
}
}
} else {
x.getLOG().debug("table non-partitioned");
if (!replicationSpec.allowReplacementInto(table)) {
// silently return, table is newer than our replacement.
return;
}
if (!replicationSpec.isMetadataOnly()) {
// repl-imports are replace-into unless the event is insert-into
loadTable(fromURI, table, !replicationSpec.isInsert(), new Path(fromURI), replicationSpec, x);
} else {
x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
}
if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
lockType = WriteEntity.WriteType.DDL_SHARED;
}
}
}
x.getOutputs().add(new WriteEntity(table, lockType));
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class ProcessAnalyzeTable method genTableStats.
private Task<?> genTableStats(GenTezProcContext context, TableScanOperator tableScan) throws HiveException {
Class<? extends InputFormat> inputFormat = tableScan.getConf().getTableMetadata().getInputFormatClass();
ParseContext parseContext = context.parseContext;
Table table = tableScan.getConf().getTableMetadata();
List<Partition> partitions = new ArrayList<>();
if (table.isPartitioned()) {
partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions());
for (Partition partn : partitions) {
LOG.debug("XXX: adding part: " + partn);
context.outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK));
}
}
TableSpec tableSpec = new TableSpec(table, partitions);
tableScan.getConf().getTableMetadata().setTableSpec(tableSpec);
if (inputFormat.equals(OrcInputFormat.class)) {
// For ORC, there is no Tez Job for table stats.
StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec());
snjWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
// If partition is specified, get pruned partition list
if (partitions.size() > 0) {
snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan));
}
return TaskFactory.get(snjWork, parseContext.getConf());
} else {
StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec());
statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix());
statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir());
statsWork.setSourceTask(context.currentTask);
statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE));
return TaskFactory.get(statsWork, parseContext.getConf());
}
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class TestDbTxnManager method addPartitionOutput.
private WriteEntity addPartitionOutput(Table t, WriteEntity.WriteType writeType) throws Exception {
Map<String, String> partSpec = new HashMap<String, String>();
partSpec.put("version", Integer.toString(nextInput++));
Partition p = new Partition(t, partSpec, new Path("/dev/null"));
WriteEntity we = new WriteEntity(p, writeType);
writeEntities.add(we);
return we;
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.
the class TestDbTxnManager method addDynamicPartitionedOutput.
private WriteEntity addDynamicPartitionedOutput(Table t, WriteEntity.WriteType writeType) throws Exception {
DummyPartition dp = new DummyPartition(t, "no clue what I should call this");
WriteEntity we = new WriteEntity(dp, writeType, false);
writeEntities.add(we);
return we;
}
use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project incubator-atlas by apache.
the class HiveHook method addOutputs.
private static void addOutputs(HiveMetaStoreBridge hiveBridge, HiveOperation op, SortedSet<WriteEntity> sortedOutputs, StringBuilder buffer, final Map<WriteEntity, Referenceable> refs, final boolean ignoreHDFSPathsInQFName) throws HiveException {
if (refs != null) {
Set<String> dataSetsProcessed = new LinkedHashSet<>();
if (sortedOutputs != null) {
for (WriteEntity output : sortedOutputs) {
final Entity entity = output;
if (!dataSetsProcessed.contains(output.getName().toLowerCase())) {
//HiveOperation.QUERY type encompasses INSERT, INSERT_OVERWRITE, UPDATE, DELETE, PATH_WRITE operations
if (addQueryType(op, (WriteEntity) entity)) {
buffer.append(SEP);
buffer.append(((WriteEntity) entity).getWriteType().name());
}
if (ignoreHDFSPathsInQFName && (Type.DFS_DIR.equals(output.getType()) || Type.LOCAL_DIR.equals(output.getType()))) {
LOG.debug("Skipping dfs dir output addition to process qualified name {} ", output.getName());
} else if (refs.containsKey(output)) {
if (output.getType() == Type.PARTITION || output.getType() == Type.TABLE) {
final Date createTime = HiveMetaStoreBridge.getTableCreatedTime(hiveBridge.hiveClient.getTable(output.getTable().getDbName(), output.getTable().getTableName()));
addDataset(buffer, refs.get(output), createTime.getTime());
} else {
addDataset(buffer, refs.get(output));
}
}
dataSetsProcessed.add(output.getName().toLowerCase());
}
}
}
}
}
Aggregations