use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.
the class ImportSemanticAnalyzer method prepareImport.
/**
* The same code is used from both the "repl load" as well as "import".
* Given that "repl load" now supports two modes "repl load dbName [location]" and
* "repl load [location]" in which case the database name has to be taken from the table metadata
* by default and then over-ridden if something specified on the command line.
*
* hence for import to work correctly we have to pass in the sessionState default Db via the
* parsedDbName parameter
*/
public static boolean prepareImport(boolean isImportCmd, boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet, boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String overrideDBName, LinkedHashMap<String, String> parsedPartSpec, String fromLocn, EximUtil.SemanticAnalyzerWrapperContext x, UpdatedMetaDataTracker updatedMetadata) throws IOException, MetaException, HiveException, URISyntaxException {
// initialize load path
URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
FileSystem fs = FileSystem.get(fromURI, x.getConf());
x.getInputs().add(toReadEntity(fromPath, x.getConf()));
MetaData rv;
try {
rv = EximUtil.readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
} catch (IOException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
}
if (rv.getTable() == null) {
// nothing to do here, silently return.
return false;
}
ReplicationSpec replicationSpec = rv.getReplicationSpec();
if (replicationSpec.isNoop()) {
// nothing to do here, silently return.
x.getLOG().debug("Current update with ID:{} is noop", replicationSpec.getCurrentReplicationState());
return false;
}
if (isImportCmd) {
replicationSpec.setReplSpecType(ReplicationSpec.Type.IMPORT);
}
String dbname = rv.getTable().getDbName();
if ((overrideDBName != null) && (!overrideDBName.isEmpty())) {
// If the parsed statement contained a db.tablename specification, prefer that.
dbname = overrideDBName;
}
// Create table associated with the import
// Executed if relevant, and used to contain all the other details about the table if not.
ImportTableDesc tblDesc;
try {
tblDesc = getBaseCreateTableDescFromTable(dbname, rv.getTable());
} catch (Exception e) {
throw new HiveException(e);
}
boolean isSourceMm = AcidUtils.isInsertOnlyTable(tblDesc.getTblProps());
if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
tblDesc.setReplicationSpec(replicationSpec);
StatsSetupConst.setBasicStatsState(tblDesc.getTblProps(), StatsSetupConst.FALSE);
}
if (isExternalSet) {
if (isSourceMm) {
throw new SemanticException("Cannot import an MM table as external");
}
tblDesc.setExternal(isExternalSet);
// This condition-check could have been avoided, but to honour the old
// default of not calling if it wasn't set, we retain that behaviour.
// TODO:cleanup after verification that the outer if isn't really needed here
}
if (isLocationSet) {
tblDesc.setLocation(parsedLocation);
x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
}
if ((parsedTableName != null) && (!parsedTableName.isEmpty())) {
tblDesc.setTableName(parsedTableName);
}
List<AddPartitionDesc> partitionDescs = new ArrayList<AddPartitionDesc>();
Iterable<Partition> partitions = rv.getPartitions();
for (Partition partition : partitions) {
// TODO: this should ideally not create AddPartitionDesc per partition
AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
StatsSetupConst.setBasicStatsState(partsDesc.getPartition(0).getPartParams(), StatsSetupConst.FALSE);
}
partitionDescs.add(partsDesc);
}
if (isPartSpecSet) {
// The import specification asked for only a particular partition to be loaded
// We load only that, and ignore all the others.
boolean found = false;
for (Iterator<AddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext(); ) {
AddPartitionDesc addPartitionDesc = partnIter.next();
if (!found && addPartitionDesc.getPartition(0).getPartSpec().equals(parsedPartSpec)) {
found = true;
} else {
partnIter.remove();
}
}
if (!found) {
throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
}
}
if (tblDesc.getTableName() == null) {
// or from the export dump.
throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
} else {
x.getConf().set("import.destination.table", tblDesc.getTableName());
for (AddPartitionDesc addPartitionDesc : partitionDescs) {
addPartitionDesc.setTableName(tblDesc.getTableName());
}
}
Warehouse wh = new Warehouse(x.getConf());
Table table = tableIfExists(tblDesc, x.getHive());
boolean tableExists = false;
if (table != null) {
checkTable(table, tblDesc, replicationSpec, x.getConf());
x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
tableExists = true;
}
// Initialize with 0 for non-ACID and non-MM tables.
Long writeId = 0L;
if (((table != null) && AcidUtils.isTransactionalTable(table)) || AcidUtils.isTablePropertyTransactional(tblDesc.getTblProps())) {
// Explain plan doesn't open a txn and hence no need to allocate write id.
if (x.getCtx().getExplainConfig() == null) {
writeId = SessionState.get().getTxnMgr().getTableWriteId(tblDesc.getDatabaseName(), tblDesc.getTableName());
}
}
int stmtId = 0;
/*
if (isAcid(writeId)) {
tblDesc.setInitialMmWriteId(writeId);
}
*/
if (!replicationSpec.isInReplicationScope()) {
createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm);
} else {
createReplImportTasks(tblDesc, partitionDescs, replicationSpec, waitOnPrecursor, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm, updatedMetadata);
}
return tableExists;
}
use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.
the class FSDatabaseEvent method dbInMetadata.
@Override
public Database dbInMetadata(String dbNameToOverride) throws SemanticException {
try {
MetaData rv = EximUtil.readMetaData(fileSystem, dbMetadataFile);
Database dbObj = rv.getDatabase();
if (dbObj == null) {
throw new IllegalArgumentException("_metadata file read did not contain a db object - invalid dump.");
}
// override the db name if provided in repl load command
if (StringUtils.isNotBlank(dbNameToOverride)) {
dbObj.setName(dbNameToOverride);
}
return dbObj;
} catch (Exception e) {
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.
the class InsertHandler method handle.
@Override
public List<Task<?>> handle(Context withinContext) throws SemanticException {
try {
FileSystem fs = FileSystem.get(new Path(withinContext.location).toUri(), withinContext.hiveConf);
MetaData metaData = EximUtil.readMetaData(fs, new Path(withinContext.location, EximUtil.METADATA_NAME));
ReplicationSpec replicationSpec = metaData.getReplicationSpec();
if (replicationSpec.isNoop()) {
return Collections.emptyList();
}
} catch (Exception e) {
LOG.error("failed to load insert event", e);
throw new SemanticException(e);
}
InsertMessage insertMessage = deserializer.getInsertMessage(withinContext.dmd.getPayload());
String actualDbName = withinContext.isDbNameEmpty() ? insertMessage.getDB() : withinContext.dbName;
Context currentContext = new Context(withinContext, actualDbName, withinContext.getDumpDirectory(), withinContext.getMetricCollector());
// Piggybacking in Import logic for now
TableHandler tableHandler = new TableHandler();
List<Task<?>> tasks = tableHandler.handle(currentContext);
readEntitySet.addAll(tableHandler.readEntities());
writeEntitySet.addAll(tableHandler.writeEntities());
getUpdatedMetadata().copyUpdatedMetadata(tableHandler.getUpdatedMetadata());
return tasks;
}
use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.
the class TableHandler method extract.
private Tuple extract(Context context) throws SemanticException {
try {
String tableType = null;
long writeId = DEFAULT_WRITE_ID;
switch(context.dmd.getDumpType()) {
case EVENT_CREATE_TABLE:
case EVENT_ADD_PARTITION:
Path metadataPath = new Path(context.location, EximUtil.METADATA_NAME);
MetaData rv = EximUtil.readMetaData(metadataPath.getFileSystem(context.hiveConf), metadataPath);
tableType = rv.getTable().getTableType();
break;
case EVENT_ALTER_TABLE:
AlterTableMessage alterTableMessage = deserializer.getAlterTableMessage(context.dmd.getPayload());
tableType = alterTableMessage.getTableObjAfter().getTableType();
writeId = alterTableMessage.getWriteId();
break;
case EVENT_ALTER_PARTITION:
AlterPartitionMessage msg = deserializer.getAlterPartitionMessage(context.dmd.getPayload());
tableType = msg.getTableObj().getTableType();
writeId = msg.getWriteId();
break;
default:
break;
}
boolean isExternalTable = tableType != null && TableType.EXTERNAL_TABLE.equals(Enum.valueOf(TableType.class, tableType));
return new Tuple(isExternalTable, writeId);
} catch (Exception e) {
LOG.error("failed to determine if the table associated with the event is external or not", e);
throw new SemanticException(e);
}
}
use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.
the class TableHandler method handle.
@Override
public List<Task<?>> handle(Context context) throws SemanticException {
try {
List<Task<?>> importTasks = new ArrayList<>();
boolean isExternal = false, isLocationSet = false;
String parsedLocation = null;
DumpType eventType = context.dmd.getDumpType();
Tuple tuple = extract(context);
MetaData rv = EximUtil.getMetaDataFromLocation(context.location, context.hiveConf);
if (tuple.isExternalTable) {
isLocationSet = true;
isExternal = true;
Table table = new Table(rv.getTable());
parsedLocation = ReplExternalTables.externalTableLocation(context.hiveConf, table.getSd().getLocation());
}
context.nestedContext.setConf(context.hiveConf);
EximUtil.SemanticAnalyzerWrapperContext x = new EximUtil.SemanticAnalyzerWrapperContext(context.hiveConf, context.db, readEntitySet, writeEntitySet, importTasks, context.log, context.nestedContext);
x.setEventType(eventType);
// REPL LOAD is not partition level. It is always DB or table level. So, passing null for partition specs.
if (TableType.VIRTUAL_VIEW.name().equals(rv.getTable().getTableType())) {
importTasks.add(ReplLoadTask.createViewTask(rv, context.dbName, context.hiveConf, context.getDumpDirectory(), context.getMetricCollector()));
} else {
ImportSemanticAnalyzer.prepareImport(false, isLocationSet, isExternal, false, (context.precursor != null), parsedLocation, null, context.dbName, null, context.location, x, updatedMetadata, context.getTxnMgr(), tuple.writeId, rv, context.getDumpDirectory(), context.getMetricCollector());
}
Task<?> openTxnTask = x.getOpenTxnTask();
if (openTxnTask != null && !importTasks.isEmpty()) {
for (Task<?> t : importTasks) {
openTxnTask.addDependentTask(t);
}
importTasks.add(openTxnTask);
}
return importTasks;
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new SemanticException(e);
}
}
Aggregations