Search in sources :

Example 6 with AlterTableAddPartitionDesc

use of org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc in project hive by apache.

the class ImportSemanticAnalyzer method getBaseAddPartitionDescFromPartition.

private static AlterTableAddPartitionDesc getBaseAddPartitionDescFromPartition(Path fromPath, String dbName, ImportTableDesc tblDesc, Partition partition, ReplicationSpec replicationSpec, HiveConf conf) throws MetaException, SemanticException {
    Map<String, String> partitionSpec = EximUtil.makePartSpec(tblDesc.getPartCols(), partition.getValues());
    StorageDescriptor sd = partition.getSd();
    String location = null;
    if (replicationSpec.isInReplicationScope() && tblDesc.isExternal()) {
        location = ReplExternalTables.externalTableLocation(conf, partition.getSd().getLocation());
        LOG.debug("partition {} has data location: {}", partition, location);
    } else {
        location = new Path(fromPath, Warehouse.makePartName(tblDesc.getPartCols(), partition.getValues())).toString();
    }
    long writeId = -1;
    if (tblDesc.getReplWriteId() != null) {
        writeId = tblDesc.getReplWriteId();
    }
    AlterTableAddPartitionDesc.PartitionDesc partitionDesc = new AlterTableAddPartitionDesc.PartitionDesc(partitionSpec, location, partition.getParameters(), sd.getInputFormat(), sd.getOutputFormat(), sd.getNumBuckets(), sd.getCols(), sd.getSerdeInfo().getSerializationLib(), sd.getSerdeInfo().getParameters(), sd.getBucketCols(), sd.getSortCols(), null, writeId);
    return new AlterTableAddPartitionDesc(dbName, tblDesc.getTableName(), true, ImmutableList.of(partitionDesc));
}
Also used : Path(org.apache.hadoop.fs.Path) AlterTableAddPartitionDesc(org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) AlterTableAddPartitionDesc(org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc)

Example 7 with AlterTableAddPartitionDesc

use of org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc in project hive by apache.

the class ImportSemanticAnalyzer method prepareImport.

public static boolean prepareImport(boolean isImportCmd, boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet, boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String overrideDBName, LinkedHashMap<String, String> parsedPartSpec, String fromLocn, EximUtil.SemanticAnalyzerWrapperContext x, UpdatedMetaDataTracker updatedMetadata, HiveTxnManager txnMgr, // Initialize with 0 for non-ACID and non-MM tables.
long writeId, MetaData rv, String dumpRoot, ReplicationMetricCollector metricCollector) throws IOException, MetaException, HiveException, URISyntaxException {
    // initialize load path
    URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
    Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
    FileSystem fs = FileSystem.get(fromURI, x.getConf());
    x.getInputs().add(toReadEntity(fromPath, x.getConf()));
    if (rv.getTable() == null) {
        // nothing to do here, silently return.
        return false;
    }
    ReplicationSpec replicationSpec = rv.getReplicationSpec();
    if (replicationSpec.isNoop()) {
        // nothing to do here, silently return.
        x.getLOG().debug("Current update with ID:{} is noop", replicationSpec.getCurrentReplicationState());
        return false;
    }
    if (isImportCmd) {
        replicationSpec.setReplSpecType(ReplicationSpec.Type.IMPORT);
    }
    String dbname = rv.getTable().getDbName();
    if ((overrideDBName != null) && (!overrideDBName.isEmpty())) {
        // If the parsed statement contained a db.tablename specification, prefer that.
        dbname = overrideDBName;
    }
    // Create table associated with the import
    // Executed if relevant, and used to contain all the other details about the table if not.
    ImportTableDesc tblDesc;
    org.apache.hadoop.hive.metastore.api.Table tblObj = rv.getTable();
    try {
        tblDesc = getBaseCreateTableDescFromTable(dbname, tblObj);
    } catch (Exception e) {
        throw new HiveException(e);
    }
    boolean inReplicationScope = false;
    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
        tblDesc.setReplicationSpec(replicationSpec);
        inReplicationScope = true;
        tblDesc.setReplWriteId(writeId);
        tblDesc.setOwnerName(tblObj.getOwner());
    }
    if (isExternalSet) {
        tblDesc.setExternal(isExternalSet);
    // This condition-check could have been avoided, but to honour the old
    // default of not calling if it wasn't set, we retain that behaviour.
    // TODO:cleanup after verification that the outer if isn't really needed here
    }
    if (isLocationSet) {
        STATIC_LOG.debug("table {} location is {}", tblDesc.getTableName(), parsedLocation);
        tblDesc.setLocation(parsedLocation);
        x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
    }
    if (StringUtils.isNotBlank(parsedTableName)) {
        tblDesc.setTableName(TableName.fromString(parsedTableName, null, dbname));
    }
    if (tblDesc.getTableName() == null) {
        // Either we got the tablename from the IMPORT statement (first priority) or from the export dump.
        throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
    } else {
        x.getConf().set("import.destination.table", tblDesc.getTableName());
    }
    List<AlterTableAddPartitionDesc> partitionDescs = new ArrayList<>();
    Iterable<Partition> partitions = rv.getPartitions();
    for (Partition partition : partitions) {
        // TODO: this should ideally not create AddPartitionDesc per partition
        AlterTableAddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition, replicationSpec, x.getConf());
        partitionDescs.add(partsDesc);
    }
    if (isPartSpecSet) {
        // The import specification asked for only a particular partition to be loaded
        // We load only that, and ignore all the others.
        boolean found = false;
        for (Iterator<AlterTableAddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext(); ) {
            AlterTableAddPartitionDesc addPartitionDesc = partnIter.next();
            if (!found && addPartitionDesc.getPartitions().get(0).getPartSpec().equals(parsedPartSpec)) {
                found = true;
            } else {
                partnIter.remove();
            }
        }
        if (!found) {
            throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
        }
    }
    Warehouse wh = new Warehouse(x.getConf());
    Table table = tableIfExists(tblDesc, x.getHive());
    boolean tableExists = false;
    if (table != null) {
        checkTable(table, tblDesc, replicationSpec, x.getConf());
        x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
        tableExists = true;
    }
    if (!tableExists && isExternalSet) {
        // If the user is explicitly importing a new external table, clear txn flags from the spec.
        AcidUtils.setNonTransactional(tblDesc.getTblProps());
    }
    int stmtId = 0;
    if (!replicationSpec.isInReplicationScope() && ((tableExists && AcidUtils.isTransactionalTable(table)) || (!tableExists && AcidUtils.isTablePropertyTransactional(tblDesc.getTblProps())))) {
        // In replication flow, no need to allocate write id. It will be allocated using the alloc write id event.
        if (x.getCtx().getExplainConfig() == null && !inReplicationScope) {
            writeId = txnMgr.getTableWriteId(tblDesc.getDatabaseName(), tblDesc.getTableName());
            stmtId = txnMgr.getStmtIdAndIncrement();
        }
    }
    if (inReplicationScope) {
        createReplImportTasks(tblDesc, partitionDescs, replicationSpec, waitOnPrecursor, table, fromURI, wh, x, writeId, stmtId, updatedMetadata, dumpRoot, metricCollector);
    } else {
        createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs, wh, x, writeId, stmtId);
    }
    return tableExists;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) AlterTableAddPartitionDesc(org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) ArrayList(java.util.ArrayList) URI(java.net.URI) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 8 with AlterTableAddPartitionDesc

use of org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc in project hive by apache.

the class ImportSemanticAnalyzer method createRegularImportTasks.

/**
 * Create tasks for regular import, no repl complexity
 * @param tblDesc
 * @param partitionDescs
 * @param isPartSpecSet
 * @param replicationSpec
 * @param table
 * @param fromURI
 * @param fs
 * @param wh
 */
private static void createRegularImportTasks(ImportTableDesc tblDesc, List<AlterTableAddPartitionDesc> partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x, Long writeId, int stmtId) throws HiveException, IOException, MetaException {
    if (table != null) {
        if (table.isPartitioned()) {
            x.getLOG().debug("table partitioned");
            for (AlterTableAddPartitionDesc addPartitionDesc : partitionDescs) {
                Map<String, String> partSpec = addPartitionDesc.getPartitions().get(0).getPartSpec();
                org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
                if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
                    x.getTasks().add(addSinglePartition(tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, false, null, null));
                } else {
                    throw new SemanticException(ErrorMsg.PARTITION_EXISTS.getMsg(partSpecToString(partSpec)));
                }
            }
        } else {
            x.getLOG().debug("table non-partitioned");
            // ensure if destination is not empty only for regular import
            Path tgtPath = new Path(table.getDataLocation().toString());
            FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf());
            checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x.getLOG());
            loadTable(fromURI, table, false, tgtPath, replicationSpec, x, writeId, stmtId);
        }
        // Set this to read because we can't overwrite any existing partitions
        x.getOutputs().add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK));
    } else {
        x.getLOG().debug("table " + tblDesc.getTableName() + " does not exist");
        Task<?> t = createTableTask(tblDesc, x);
        table = createNewTableMetadataObject(tblDesc, false);
        Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
        // Since we are going to be creating a new table in a db, we should mark that db as a write entity
        // so that the auth framework can go to work there.
        x.getOutputs().add(new WriteEntity(parentDb, WriteEntity.WriteType.DDL_SHARED));
        if (isPartitioned(tblDesc)) {
            for (AlterTableAddPartitionDesc addPartitionDesc : partitionDescs) {
                t.addDependentTask(addSinglePartition(tblDesc, table, wh, addPartitionDesc, replicationSpec, x, writeId, stmtId, false, null, null));
            }
        } else {
            x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
            if (tblDesc.isExternal() && (tblDesc.getLocation() == null)) {
                x.getLOG().debug("Importing in place, no emptiness check, no copying/loading");
                Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME);
                tblDesc.setLocation(dataPath.toString());
            } else {
                Path tablePath = null;
                if (tblDesc.getLocation() != null) {
                    tablePath = new Path(tblDesc.getLocation());
                } else {
                    tablePath = wh.getDefaultTablePath(parentDb, tblDesc.getTableName(), tblDesc.isExternal());
                }
                FileSystem tgtFs = FileSystem.get(tablePath.toUri(), x.getConf());
                checkTargetLocationEmpty(tgtFs, tablePath, replicationSpec, x.getLOG());
                t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x, writeId, stmtId));
            }
        }
        x.getTasks().add(t);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AlterTableAddPartitionDesc(org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc) FileSystem(org.apache.hadoop.fs.FileSystem) Database(org.apache.hadoop.hive.metastore.api.Database) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 9 with AlterTableAddPartitionDesc

use of org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc in project hive by apache.

the class LoadPartitions method addTasksForPartition.

/**
 * returns the root task for adding all partitions in a batch
 */
private boolean addTasksForPartition(Table table, AlterTableAddPartitionDesc addPartitionDesc, AlterTableAddPartitionDesc.PartitionDesc lastPartSpec) throws MetaException, HiveException {
    Task<?> addPartTask = TaskFactory.get(new DDLWork(new HashSet<>(), new HashSet<>(), addPartitionDesc, true, (new Path(context.dumpDirectory)).getParent().toString(), this.metricCollector), context.hiveConf);
    // checkpointing task already added as part of add batch of partition
    if (isMetaDataOp() || TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
        tracker.addTask(addPartTask);
        return true;
    }
    // Add Copy task for all partitions
    boolean lastProcessedStageFound = false;
    for (AlterTableAddPartitionDesc.PartitionDesc partSpec : addPartitionDesc.getPartitions()) {
        if (!tracker.canAddMoreTasks()) {
            // update replication state with the copy task added with which it needs to proceed next
            ReplicationState currentReplicationState = new ReplicationState(new PartitionState(table.getTableName(), addPartitionDesc, partSpec, PartitionState.Stage.COPY));
            updateReplicationState(currentReplicationState);
            return false;
        }
        Path replicaWarehousePartitionLocation = locationOnReplicaWarehouse(table, partSpec);
        partSpec.setLocation(replicaWarehousePartitionLocation.toString());
        LOG.debug("adding dependent CopyWork for partition " + partSpecToString(partSpec.getPartSpec()) + " with source location: " + partSpec.getLocation());
        if (!lastProcessedStageFound && lastPartSpec != null && lastPartSpec.getLocation() != partSpec.getLocation()) {
            // Don't process copy task if already processed as part of previous run
            continue;
        }
        lastProcessedStageFound = true;
        boolean copyAtLoad = context.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET);
        Task<?> copyTask = ReplCopyTask.getLoadCopyTask(event.replicationSpec(), new Path(event.dataPath() + Path.SEPARATOR + Warehouse.makePartPath(partSpec.getPartSpec())), replicaWarehousePartitionLocation, context.hiveConf, copyAtLoad, false, (new Path(context.dumpDirectory)).getParent().toString(), this.metricCollector);
        tracker.addTask(copyTask);
    }
    // add partition metadata task once all the copy tasks are added
    tracker.addDependentTask(addPartTask);
    return true;
}
Also used : Path(org.apache.hadoop.fs.Path) AlterTableAddPartitionDesc(org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) PartitionState(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.ReplicationState.PartitionState) ReplicationState(org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.ReplicationState) HashSet(java.util.HashSet)

Example 10 with AlterTableAddPartitionDesc

use of org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc in project hive by apache.

the class LoadPartitions method tasks.

public TaskTracker tasks() throws Exception {
    /*
    We are doing this both in load table and load partitions
     */
    Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName());
    LoadTable.TableLocationTuple tableLocationTuple = LoadTable.tableLocation(tableDesc, parentDb, tableContext, context);
    tableDesc.setLocation(tableLocationTuple.location);
    if (table == null) {
        // new table
        table = tableDesc.toTable(context.hiveConf);
        if (isPartitioned(tableDesc)) {
            updateReplicationState(initialReplicationState());
            if (!forNewTable().hasReplicationState()) {
                // Add ReplStateLogTask only if no pending table load tasks left for next cycle
                Task<?> replLogTask = ReplUtils.getTableReplLogTask(tableDesc, replLogger, context.hiveConf, metricCollector, (new Path(context.dumpDirectory)).getParent().toString());
                tracker.addDependentTask(replLogTask);
            }
            return tracker;
        }
    } else {
        // existing
        if (table.isPartitioned()) {
            List<AlterTableAddPartitionDesc> partitionDescs = event.partitionDescriptions(tableDesc);
            if (!event.replicationSpec().isMetadataOnly() && !partitionDescs.isEmpty()) {
                updateReplicationState(initialReplicationState());
                if (!forExistingTable(lastReplicatedPartition).hasReplicationState()) {
                    // Add ReplStateLogTask only if no pending table load tasks left for next cycle
                    Task<?> replLogTask = ReplUtils.getTableReplLogTask(tableDesc, replLogger, context.hiveConf, metricCollector, (new Path(context.dumpDirectory)).getParent().toString());
                    tracker.addDependentTask(replLogTask);
                }
                return tracker;
            }
        }
    }
    return tracker;
}
Also used : Path(org.apache.hadoop.fs.Path) AlterTableAddPartitionDesc(org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc) Database(org.apache.hadoop.hive.metastore.api.Database)

Aggregations

AlterTableAddPartitionDesc (org.apache.hadoop.hive.ql.ddl.table.partition.add.AlterTableAddPartitionDesc)11 Path (org.apache.hadoop.fs.Path)10 Database (org.apache.hadoop.hive.metastore.api.Database)4 ArrayList (java.util.ArrayList)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2 Partition (org.apache.hadoop.hive.metastore.api.Partition)2 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)2 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)2 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 ImportSemanticAnalyzer.partSpecToString (org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer.partSpecToString)2 IOException (java.io.IOException)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 Warehouse (org.apache.hadoop.hive.metastore.Warehouse)1 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)1