Examples with MetaData - org.apache.hadoop.hive.ql.parse.repl.load.MetaData

Example 1 with MetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.

the class ImportSemanticAnalyzer method prepareImport.

/**
 * The same code is used from both the "repl load" as well as "import".
 * Given that "repl load" now supports two modes "repl load dbName [location]" and
 * "repl load [location]" in which case the database name has to be taken from the table metadata
 * by default and then over-ridden if something specified on the command line.
 *
 * hence for import to work correctly we have to pass in the sessionState default Db via the
 * parsedDbName parameter
 */
public static boolean prepareImport(boolean isImportCmd, boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet, boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String overrideDBName, LinkedHashMap<String, String> parsedPartSpec, String fromLocn, EximUtil.SemanticAnalyzerWrapperContext x, UpdatedMetaDataTracker updatedMetadata) throws IOException, MetaException, HiveException, URISyntaxException {
    // initialize load path
    URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
    Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
    FileSystem fs = FileSystem.get(fromURI, x.getConf());
    x.getInputs().add(toReadEntity(fromPath, x.getConf()));
    MetaData rv;
    try {
        rv = EximUtil.readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
    }
    if (rv.getTable() == null) {
        // nothing to do here, silently return.
        return false;
    }
    ReplicationSpec replicationSpec = rv.getReplicationSpec();
    if (replicationSpec.isNoop()) {
        // nothing to do here, silently return.
        x.getLOG().debug("Current update with ID:{} is noop", replicationSpec.getCurrentReplicationState());
        return false;
    }
    if (isImportCmd) {
        replicationSpec.setReplSpecType(ReplicationSpec.Type.IMPORT);
    }
    String dbname = rv.getTable().getDbName();
    if ((overrideDBName != null) && (!overrideDBName.isEmpty())) {
        // If the parsed statement contained a db.tablename specification, prefer that.
        dbname = overrideDBName;
    }
    // Create table associated with the import
    // Executed if relevant, and used to contain all the other details about the table if not.
    ImportTableDesc tblDesc;
    try {
        tblDesc = getBaseCreateTableDescFromTable(dbname, rv.getTable());
    } catch (Exception e) {
        throw new HiveException(e);
    }
    boolean isSourceMm = AcidUtils.isInsertOnlyTable(tblDesc.getTblProps());
    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
        tblDesc.setReplicationSpec(replicationSpec);
        StatsSetupConst.setBasicStatsState(tblDesc.getTblProps(), StatsSetupConst.FALSE);
    }
    if (isExternalSet) {
        if (isSourceMm) {
            throw new SemanticException("Cannot import an MM table as external");
        }
        tblDesc.setExternal(isExternalSet);
    // This condition-check could have been avoided, but to honour the old
    // default of not calling if it wasn't set, we retain that behaviour.
    // TODO:cleanup after verification that the outer if isn't really needed here
    }
    if (isLocationSet) {
        tblDesc.setLocation(parsedLocation);
        x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
    }
    if ((parsedTableName != null) && (!parsedTableName.isEmpty())) {
        tblDesc.setTableName(parsedTableName);
    }
    List<AddPartitionDesc> partitionDescs = new ArrayList<AddPartitionDesc>();
    Iterable<Partition> partitions = rv.getPartitions();
    for (Partition partition : partitions) {
        // TODO: this should ideally not create AddPartitionDesc per partition
        AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
        if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
            StatsSetupConst.setBasicStatsState(partsDesc.getPartition(0).getPartParams(), StatsSetupConst.FALSE);
        }
        partitionDescs.add(partsDesc);
    }
    if (isPartSpecSet) {
        // The import specification asked for only a particular partition to be loaded
        // We load only that, and ignore all the others.
        boolean found = false;
        for (Iterator<AddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext(); ) {
            AddPartitionDesc addPartitionDesc = partnIter.next();
            if (!found && addPartitionDesc.getPartition(0).getPartSpec().equals(parsedPartSpec)) {
                found = true;
            } else {
                partnIter.remove();
            }
        }
        if (!found) {
            throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
        }
    }
    if (tblDesc.getTableName() == null) {
        // or from the export dump.
        throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
    } else {
        x.getConf().set("import.destination.table", tblDesc.getTableName());
        for (AddPartitionDesc addPartitionDesc : partitionDescs) {
            addPartitionDesc.setTableName(tblDesc.getTableName());
        }
    }
    Warehouse wh = new Warehouse(x.getConf());
    Table table = tableIfExists(tblDesc, x.getHive());
    boolean tableExists = false;
    if (table != null) {
        checkTable(table, tblDesc, replicationSpec, x.getConf());
        x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
        tableExists = true;
    }
    // Initialize with 0 for non-ACID and non-MM tables.
    Long writeId = 0L;
    if (((table != null) && AcidUtils.isTransactionalTable(table)) || AcidUtils.isTablePropertyTransactional(tblDesc.getTblProps())) {
        // Explain plan doesn't open a txn and hence no need to allocate write id.
        if (x.getCtx().getExplainConfig() == null) {
            writeId = SessionState.get().getTxnMgr().getTableWriteId(tblDesc.getDatabaseName(), tblDesc.getTableName());
        }
    }
    int stmtId = 0;
    /*
    if (isAcid(writeId)) {
      tblDesc.setInitialMmWriteId(writeId);
    }
    */
    if (!replicationSpec.isInReplicationScope()) {
        createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm);
    } else {
        createReplImportTasks(tblDesc, partitionDescs, replicationSpec, waitOnPrecursor, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm, updatedMetadata);
    }
    return tableExists;
}

Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) MetaData(org.apache.hadoop.hive.ql.parse.repl.load.MetaData) FileSystem(org.apache.hadoop.fs.FileSystem) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Example 2 with MetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.

the class FSDatabaseEvent method dbInMetadata.

@Override
public Database dbInMetadata(String dbNameToOverride) throws SemanticException {
    try {
        MetaData rv = EximUtil.readMetaData(fileSystem, dbMetadataFile);
        Database dbObj = rv.getDatabase();
        if (dbObj == null) {
            throw new IllegalArgumentException("_metadata file read did not contain a db object - invalid dump.");
        }
        // override the db name if provided in repl load command
        if (StringUtils.isNotBlank(dbNameToOverride)) {
            dbObj.setName(dbNameToOverride);
        }
        return dbObj;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}

Also used : MetaData(org.apache.hadoop.hive.ql.parse.repl.load.MetaData) Database(org.apache.hadoop.hive.metastore.api.Database) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 3 with MetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.

the class InsertHandler method handle.

@Override
public List<Task<?>> handle(Context withinContext) throws SemanticException {
    try {
        FileSystem fs = FileSystem.get(new Path(withinContext.location).toUri(), withinContext.hiveConf);
        MetaData metaData = EximUtil.readMetaData(fs, new Path(withinContext.location, EximUtil.METADATA_NAME));
        ReplicationSpec replicationSpec = metaData.getReplicationSpec();
        if (replicationSpec.isNoop()) {
            return Collections.emptyList();
        }
    } catch (Exception e) {
        LOG.error("failed to load insert event", e);
        throw new SemanticException(e);
    }
    InsertMessage insertMessage = deserializer.getInsertMessage(withinContext.dmd.getPayload());
    String actualDbName = withinContext.isDbNameEmpty() ? insertMessage.getDB() : withinContext.dbName;
    Context currentContext = new Context(withinContext, actualDbName, withinContext.getDumpDirectory(), withinContext.getMetricCollector());
    // Piggybacking in Import logic for now
    TableHandler tableHandler = new TableHandler();
    List<Task<?>> tasks = tableHandler.handle(currentContext);
    readEntitySet.addAll(tableHandler.readEntities());
    writeEntitySet.addAll(tableHandler.writeEntities());
    getUpdatedMetadata().copyUpdatedMetadata(tableHandler.getUpdatedMetadata());
    return tasks;
}

Also used : Path(org.apache.hadoop.fs.Path) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) Task(org.apache.hadoop.hive.ql.exec.Task) InsertMessage(org.apache.hadoop.hive.metastore.messaging.InsertMessage) MetaData(org.apache.hadoop.hive.ql.parse.repl.load.MetaData) FileSystem(org.apache.hadoop.fs.FileSystem) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 4 with MetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.

the class TableHandler method extract.

private Tuple extract(Context context) throws SemanticException {
    try {
        String tableType = null;
        long writeId = DEFAULT_WRITE_ID;
        switch(context.dmd.getDumpType()) {
            case EVENT_CREATE_TABLE:
            case EVENT_ADD_PARTITION:
                Path metadataPath = new Path(context.location, EximUtil.METADATA_NAME);
                MetaData rv = EximUtil.readMetaData(metadataPath.getFileSystem(context.hiveConf), metadataPath);
                tableType = rv.getTable().getTableType();
                break;
            case EVENT_ALTER_TABLE:
                AlterTableMessage alterTableMessage = deserializer.getAlterTableMessage(context.dmd.getPayload());
                tableType = alterTableMessage.getTableObjAfter().getTableType();
                writeId = alterTableMessage.getWriteId();
                break;
            case EVENT_ALTER_PARTITION:
                AlterPartitionMessage msg = deserializer.getAlterPartitionMessage(context.dmd.getPayload());
                tableType = msg.getTableObj().getTableType();
                writeId = msg.getWriteId();
                break;
            default:
                break;
        }
        boolean isExternalTable = tableType != null && TableType.EXTERNAL_TABLE.equals(Enum.valueOf(TableType.class, tableType));
        return new Tuple(isExternalTable, writeId);
    } catch (Exception e) {
        LOG.error("failed to determine if the table associated with the event is external or not", e);
        throw new SemanticException(e);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) MetaData(org.apache.hadoop.hive.ql.parse.repl.load.MetaData) AlterTableMessage(org.apache.hadoop.hive.metastore.messaging.AlterTableMessage) AlterPartitionMessage(org.apache.hadoop.hive.metastore.messaging.AlterPartitionMessage) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 5 with MetaData

use of org.apache.hadoop.hive.ql.parse.repl.load.MetaData in project hive by apache.

the class TableHandler method handle.

@Override
public List<Task<?>> handle(Context context) throws SemanticException {
    try {
        List<Task<?>> importTasks = new ArrayList<>();
        boolean isExternal = false, isLocationSet = false;
        String parsedLocation = null;
        DumpType eventType = context.dmd.getDumpType();
        Tuple tuple = extract(context);
        MetaData rv = EximUtil.getMetaDataFromLocation(context.location, context.hiveConf);
        if (tuple.isExternalTable) {
            isLocationSet = true;
            isExternal = true;
            Table table = new Table(rv.getTable());
            parsedLocation = ReplExternalTables.externalTableLocation(context.hiveConf, table.getSd().getLocation());
        }
        context.nestedContext.setConf(context.hiveConf);
        EximUtil.SemanticAnalyzerWrapperContext x = new EximUtil.SemanticAnalyzerWrapperContext(context.hiveConf, context.db, readEntitySet, writeEntitySet, importTasks, context.log, context.nestedContext);
        x.setEventType(eventType);
        // REPL LOAD is not partition level. It is always DB or table level. So, passing null for partition specs.
        if (TableType.VIRTUAL_VIEW.name().equals(rv.getTable().getTableType())) {
            importTasks.add(ReplLoadTask.createViewTask(rv, context.dbName, context.hiveConf, context.getDumpDirectory(), context.getMetricCollector()));
        } else {
            ImportSemanticAnalyzer.prepareImport(false, isLocationSet, isExternal, false, (context.precursor != null), parsedLocation, null, context.dbName, null, context.location, x, updatedMetadata, context.getTxnMgr(), tuple.writeId, rv, context.getDumpDirectory(), context.getMetricCollector());
        }
        Task<?> openTxnTask = x.getOpenTxnTask();
        if (openTxnTask != null && !importTasks.isEmpty()) {
            for (Task<?> t : importTasks) {
                openTxnTask.addDependentTask(t);
            }
            importTasks.add(openTxnTask);
        }
        return importTasks;
    } catch (RuntimeException e) {
        throw e;
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}

Also used : Task(org.apache.hadoop.hive.ql.exec.Task) ReplLoadTask(org.apache.hadoop.hive.ql.exec.repl.ReplLoadTask) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) EximUtil(org.apache.hadoop.hive.ql.parse.EximUtil) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) DumpType(org.apache.hadoop.hive.ql.parse.repl.DumpType) MetaData(org.apache.hadoop.hive.ql.parse.repl.load.MetaData) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

MetaData (org.apache.hadoop.hive.ql.parse.repl.load.MetaData)9 Path (org.apache.hadoop.fs.Path)5 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 IOException (java.io.IOException)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 URISyntaxException (java.net.URISyntaxException)2 ArrayList (java.util.ArrayList)2 Database (org.apache.hadoop.hive.metastore.api.Database)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2 Task (org.apache.hadoop.hive.ql.exec.Task)2 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)2 Table (org.apache.hadoop.hive.ql.metadata.Table)2 URI (java.net.URI)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 Tree (org.antlr.runtime.tree.Tree)1 Warehouse (org.apache.hadoop.hive.metastore.Warehouse)1