Search in sources :

Example 1 with ImportTableDesc

use of org.apache.hadoop.hive.ql.plan.ImportTableDesc in project hive by apache.

the class ImportSemanticAnalyzer method prepareImport.

/**
 * The same code is used from both the "repl load" as well as "import".
 * Given that "repl load" now supports two modes "repl load dbName [location]" and
 * "repl load [location]" in which case the database name has to be taken from the table metadata
 * by default and then over-ridden if something specified on the command line.
 *
 * hence for import to work correctly we have to pass in the sessionState default Db via the
 * parsedDbName parameter
 */
public static boolean prepareImport(boolean isImportCmd, boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet, boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String overrideDBName, LinkedHashMap<String, String> parsedPartSpec, String fromLocn, EximUtil.SemanticAnalyzerWrapperContext x, UpdatedMetaDataTracker updatedMetadata) throws IOException, MetaException, HiveException, URISyntaxException {
    // initialize load path
    URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
    Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
    FileSystem fs = FileSystem.get(fromURI, x.getConf());
    x.getInputs().add(toReadEntity(fromPath, x.getConf()));
    MetaData rv;
    try {
        rv = EximUtil.readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
    }
    if (rv.getTable() == null) {
        // nothing to do here, silently return.
        return false;
    }
    ReplicationSpec replicationSpec = rv.getReplicationSpec();
    if (replicationSpec.isNoop()) {
        // nothing to do here, silently return.
        x.getLOG().debug("Current update with ID:{} is noop", replicationSpec.getCurrentReplicationState());
        return false;
    }
    if (isImportCmd) {
        replicationSpec.setReplSpecType(ReplicationSpec.Type.IMPORT);
    }
    String dbname = rv.getTable().getDbName();
    if ((overrideDBName != null) && (!overrideDBName.isEmpty())) {
        // If the parsed statement contained a db.tablename specification, prefer that.
        dbname = overrideDBName;
    }
    // Create table associated with the import
    // Executed if relevant, and used to contain all the other details about the table if not.
    ImportTableDesc tblDesc;
    try {
        tblDesc = getBaseCreateTableDescFromTable(dbname, rv.getTable());
    } catch (Exception e) {
        throw new HiveException(e);
    }
    boolean isSourceMm = AcidUtils.isInsertOnlyTable(tblDesc.getTblProps());
    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
        tblDesc.setReplicationSpec(replicationSpec);
        StatsSetupConst.setBasicStatsState(tblDesc.getTblProps(), StatsSetupConst.FALSE);
    }
    if (isExternalSet) {
        if (isSourceMm) {
            throw new SemanticException("Cannot import an MM table as external");
        }
        tblDesc.setExternal(isExternalSet);
    // This condition-check could have been avoided, but to honour the old
    // default of not calling if it wasn't set, we retain that behaviour.
    // TODO:cleanup after verification that the outer if isn't really needed here
    }
    if (isLocationSet) {
        tblDesc.setLocation(parsedLocation);
        x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
    }
    if ((parsedTableName != null) && (!parsedTableName.isEmpty())) {
        tblDesc.setTableName(parsedTableName);
    }
    List<AddPartitionDesc> partitionDescs = new ArrayList<AddPartitionDesc>();
    Iterable<Partition> partitions = rv.getPartitions();
    for (Partition partition : partitions) {
        // TODO: this should ideally not create AddPartitionDesc per partition
        AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
        if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
            StatsSetupConst.setBasicStatsState(partsDesc.getPartition(0).getPartParams(), StatsSetupConst.FALSE);
        }
        partitionDescs.add(partsDesc);
    }
    if (isPartSpecSet) {
        // The import specification asked for only a particular partition to be loaded
        // We load only that, and ignore all the others.
        boolean found = false;
        for (Iterator<AddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext(); ) {
            AddPartitionDesc addPartitionDesc = partnIter.next();
            if (!found && addPartitionDesc.getPartition(0).getPartSpec().equals(parsedPartSpec)) {
                found = true;
            } else {
                partnIter.remove();
            }
        }
        if (!found) {
            throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
        }
    }
    if (tblDesc.getTableName() == null) {
        // or from the export dump.
        throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
    } else {
        x.getConf().set("import.destination.table", tblDesc.getTableName());
        for (AddPartitionDesc addPartitionDesc : partitionDescs) {
            addPartitionDesc.setTableName(tblDesc.getTableName());
        }
    }
    Warehouse wh = new Warehouse(x.getConf());
    Table table = tableIfExists(tblDesc, x.getHive());
    boolean tableExists = false;
    if (table != null) {
        checkTable(table, tblDesc, replicationSpec, x.getConf());
        x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
        tableExists = true;
    }
    // Initialize with 0 for non-ACID and non-MM tables.
    Long writeId = 0L;
    if (((table != null) && AcidUtils.isTransactionalTable(table)) || AcidUtils.isTablePropertyTransactional(tblDesc.getTblProps())) {
        // Explain plan doesn't open a txn and hence no need to allocate write id.
        if (x.getCtx().getExplainConfig() == null) {
            writeId = SessionState.get().getTxnMgr().getTableWriteId(tblDesc.getDatabaseName(), tblDesc.getTableName());
        }
    }
    int stmtId = 0;
    /*
    if (isAcid(writeId)) {
      tblDesc.setInitialMmWriteId(writeId);
    }
    */
    if (!replicationSpec.isInReplicationScope()) {
        createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm);
    } else {
        createReplImportTasks(tblDesc, partitionDescs, replicationSpec, waitOnPrecursor, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm, updatedMetadata);
    }
    return tableExists;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) MetaData(org.apache.hadoop.hive.ql.parse.repl.load.MetaData) FileSystem(org.apache.hadoop.fs.FileSystem) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Example 2 with ImportTableDesc

use of org.apache.hadoop.hive.ql.plan.ImportTableDesc in project hive by apache.

the class ImportSemanticAnalyzer method getBaseCreateTableDescFromTable.

private static ImportTableDesc getBaseCreateTableDescFromTable(String dbName, org.apache.hadoop.hive.metastore.api.Table tblObj) throws Exception {
    Table table = new Table(tblObj);
    ImportTableDesc tblDesc = new ImportTableDesc(dbName, table);
    return tblDesc;
}
Also used : Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc)

Example 3 with ImportTableDesc

use of org.apache.hadoop.hive.ql.plan.ImportTableDesc in project hive by apache.

the class ImportSemanticAnalyzer method prepareImport.

public static boolean prepareImport(boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet, boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String parsedDbName, LinkedHashMap<String, String> parsedPartSpec, String fromLocn, EximUtil.SemanticAnalyzerWrapperContext x, Map<String, Long> dbsUpdated, Map<String, Long> tablesUpdated) throws IOException, MetaException, HiveException, URISyntaxException {
    // initialize load path
    URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
    Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
    FileSystem fs = FileSystem.get(fromURI, x.getConf());
    x.getInputs().add(toReadEntity(fromPath, x.getConf()));
    EximUtil.ReadMetaData rv = new EximUtil.ReadMetaData();
    try {
        rv = EximUtil.readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
    }
    ReplicationSpec replicationSpec = rv.getReplicationSpec();
    if (replicationSpec.isNoop()) {
        // nothing to do here, silently return.
        return false;
    }
    String dbname = SessionState.get().getCurrentDatabase();
    if ((parsedDbName != null) && (!parsedDbName.isEmpty())) {
        // If the parsed statement contained a db.tablename specification, prefer that.
        dbname = parsedDbName;
    }
    if (dbsUpdated != null) {
        dbsUpdated.put(dbname, Long.valueOf(replicationSpec.get(ReplicationSpec.KEY.EVENT_ID)));
    }
    // Create table associated with the import
    // Executed if relevant, and used to contain all the other details about the table if not.
    ImportTableDesc tblDesc;
    try {
        tblDesc = getBaseCreateTableDescFromTable(dbname, rv.getTable());
    } catch (Exception e) {
        throw new HiveException(e);
    }
    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
        tblDesc.setReplicationSpec(replicationSpec);
    }
    if (isExternalSet) {
        tblDesc.setExternal(isExternalSet);
    // This condition-check could have been avoided, but to honour the old
    // default of not calling if it wasn't set, we retain that behaviour.
    // TODO:cleanup after verification that the outer if isn't really needed here
    }
    if (isLocationSet) {
        tblDesc.setLocation(parsedLocation);
        x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
    }
    if ((parsedTableName != null) && (!parsedTableName.isEmpty())) {
        tblDesc.setTableName(parsedTableName);
    }
    if (tablesUpdated != null) {
        tablesUpdated.put(dbname + "." + tblDesc.getTableName(), Long.valueOf(replicationSpec.get(ReplicationSpec.KEY.EVENT_ID)));
    }
    List<AddPartitionDesc> partitionDescs = new ArrayList<AddPartitionDesc>();
    Iterable<Partition> partitions = rv.getPartitions();
    for (Partition partition : partitions) {
        // TODO: this should ideally not create AddPartitionDesc per partition
        AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
        partitionDescs.add(partsDesc);
    }
    if (isPartSpecSet) {
        // The import specification asked for only a particular partition to be loaded
        // We load only that, and ignore all the others.
        boolean found = false;
        for (Iterator<AddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext(); ) {
            AddPartitionDesc addPartitionDesc = partnIter.next();
            if (!found && addPartitionDesc.getPartition(0).getPartSpec().equals(parsedPartSpec)) {
                found = true;
            } else {
                partnIter.remove();
            }
        }
        if (!found) {
            throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
        }
    }
    if (tblDesc.getTableName() == null) {
        // or from the export dump.
        throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
    } else {
        x.getConf().set("import.destination.table", tblDesc.getTableName());
        for (AddPartitionDesc addPartitionDesc : partitionDescs) {
            addPartitionDesc.setTableName(tblDesc.getTableName());
        }
    }
    Warehouse wh = new Warehouse(x.getConf());
    Table table = tableIfExists(tblDesc, x.getHive());
    boolean tableExists = false;
    if (table != null) {
        checkTable(table, tblDesc, replicationSpec, x.getConf());
        x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
        tableExists = true;
    }
    if (!replicationSpec.isInReplicationScope()) {
        createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs, wh, x);
    } else {
        createReplImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, waitOnPrecursor, table, fromURI, fs, wh, x);
    }
    return tableExists;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) FileSystem(org.apache.hadoop.fs.FileSystem) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Example 4 with ImportTableDesc

use of org.apache.hadoop.hive.ql.plan.ImportTableDesc in project hive by apache.

the class LoadTable method tasks.

public TaskTracker tasks() throws SemanticException {
    // or are both specified, in which case, that's what we are intended to create the new table as.
    try {
        if (event.shouldNotReplicate()) {
            return tracker;
        }
        // this can never be null or empty;
        String dbName = tableContext.dbNameToLoadIn;
        // Create table associated with the import
        // Executed if relevant, and used to contain all the other details about the table if not.
        ImportTableDesc tableDesc = tableContext.overrideProperties(event.tableDesc(dbName));
        Table table = ImportSemanticAnalyzer.tableIfExists(tableDesc, context.hiveDb);
        ReplicationSpec replicationSpec = event.replicationSpec();
        // Normally, on import, trying to create a table or a partition in a db that does not yet exist
        // is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
        // to create tasks to create a table inside a db that as-of-now does not exist, but there is
        // a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
        // defaults and do not error out in that case.
        // the above will change now since we are going to split replication load in multiple execution
        // tasks and hence we could have created the database earlier in which case the waitOnPrecursor will
        // be false and hence if db Not found we should error out.
        Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName());
        if (parentDb == null) {
            if (!tableContext.waitOnPrecursor()) {
                throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tableDesc.getDatabaseName()));
            }
        }
        if (table == null) {
            // If table doesn't exist, allow creating a new one only if the database state is older than the update.
            if ((parentDb != null) && (!replicationSpec.allowReplacementInto(parentDb.getParameters()))) {
                // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
                return tracker;
            }
        } else {
            if (!replicationSpec.allowReplacementInto(table.getParameters())) {
                // If the target table exists and is newer or same as current update based on repl.last.id, then just noop it.
                return tracker;
            }
        }
        if (tableDesc.getLocation() == null) {
            tableDesc.setLocation(location(tableDesc, parentDb));
        }
        /* Note: In the following section, Metadata-only import handling logic is
     interleaved with regular repl-import logic. The rule of thumb being
     followed here is that MD-only imports are essentially ALTERs. They do
     not load data, and should not be "creating" any metadata - they should
     be replacing instead. The only place it makes sense for a MD-only import
     to create is in the case of a table that's been dropped and recreated,
     or in the case of an unpartitioned table. In all other cases, it should
     behave like a noop or a pure MD alter.
  */
        if (table == null) {
            newTableTasks(tableDesc);
        } else {
            existingTableTasks(tableDesc, table, replicationSpec);
        }
        if (!isPartitioned(tableDesc)) {
            createTableReplLogTask(tableDesc.getTableName(), tableDesc.tableType());
        }
        return tracker;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}
Also used : ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) Database(org.apache.hadoop.hive.metastore.api.Database) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) IOException(java.io.IOException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 5 with ImportTableDesc

use of org.apache.hadoop.hive.ql.plan.ImportTableDesc in project hive by apache.

the class FSTableEvent method tableDesc.

@Override
public ImportTableDesc tableDesc(String dbName) throws SemanticException {
    try {
        Table table = new Table(metadata.getTable());
        ImportTableDesc tableDesc = new ImportTableDesc(StringUtils.isBlank(dbName) ? table.getDbName() : dbName, table);
        tableDesc.setReplicationSpec(metadata.getReplicationSpec());
        return tableDesc;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}
Also used : Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

Table (org.apache.hadoop.hive.ql.metadata.Table)5 ImportTableDesc (org.apache.hadoop.hive.ql.plan.ImportTableDesc)5 IOException (java.io.IOException)3 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)3 URI (java.net.URI)2 URISyntaxException (java.net.URISyntaxException)2 ArrayList (java.util.ArrayList)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 Warehouse (org.apache.hadoop.hive.metastore.Warehouse)2 Partition (org.apache.hadoop.hive.metastore.api.Partition)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)2 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)2 AddPartitionDesc (org.apache.hadoop.hive.ql.plan.AddPartitionDesc)2 Database (org.apache.hadoop.hive.metastore.api.Database)1 ReplicationSpec (org.apache.hadoop.hive.ql.parse.ReplicationSpec)1 MetaData (org.apache.hadoop.hive.ql.parse.repl.load.MetaData)1