Search in sources :

Example 6 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class ImportSemanticAnalyzer method prepareImport.

public static boolean prepareImport(boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet, boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String parsedDbName, LinkedHashMap<String, String> parsedPartSpec, String fromLocn, EximUtil.SemanticAnalyzerWrapperContext x, Map<String, Long> dbsUpdated, Map<String, Long> tablesUpdated) throws IOException, MetaException, HiveException, URISyntaxException {
    // initialize load path
    URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
    Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
    FileSystem fs = FileSystem.get(fromURI, x.getConf());
    x.getInputs().add(toReadEntity(fromPath, x.getConf()));
    EximUtil.ReadMetaData rv = new EximUtil.ReadMetaData();
    try {
        rv = EximUtil.readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
    }
    ReplicationSpec replicationSpec = rv.getReplicationSpec();
    if (replicationSpec.isNoop()) {
        // nothing to do here, silently return.
        return false;
    }
    String dbname = SessionState.get().getCurrentDatabase();
    if ((parsedDbName != null) && (!parsedDbName.isEmpty())) {
        // If the parsed statement contained a db.tablename specification, prefer that.
        dbname = parsedDbName;
    }
    if (dbsUpdated != null) {
        dbsUpdated.put(dbname, Long.valueOf(replicationSpec.get(ReplicationSpec.KEY.EVENT_ID)));
    }
    // Create table associated with the import
    // Executed if relevant, and used to contain all the other details about the table if not.
    ImportTableDesc tblDesc;
    try {
        tblDesc = getBaseCreateTableDescFromTable(dbname, rv.getTable());
    } catch (Exception e) {
        throw new HiveException(e);
    }
    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
        tblDesc.setReplicationSpec(replicationSpec);
    }
    if (isExternalSet) {
        tblDesc.setExternal(isExternalSet);
    // This condition-check could have been avoided, but to honour the old
    // default of not calling if it wasn't set, we retain that behaviour.
    // TODO:cleanup after verification that the outer if isn't really needed here
    }
    if (isLocationSet) {
        tblDesc.setLocation(parsedLocation);
        x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
    }
    if ((parsedTableName != null) && (!parsedTableName.isEmpty())) {
        tblDesc.setTableName(parsedTableName);
    }
    if (tablesUpdated != null) {
        tablesUpdated.put(dbname + "." + tblDesc.getTableName(), Long.valueOf(replicationSpec.get(ReplicationSpec.KEY.EVENT_ID)));
    }
    List<AddPartitionDesc> partitionDescs = new ArrayList<AddPartitionDesc>();
    Iterable<Partition> partitions = rv.getPartitions();
    for (Partition partition : partitions) {
        // TODO: this should ideally not create AddPartitionDesc per partition
        AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
        partitionDescs.add(partsDesc);
    }
    if (isPartSpecSet) {
        // The import specification asked for only a particular partition to be loaded
        // We load only that, and ignore all the others.
        boolean found = false;
        for (Iterator<AddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext(); ) {
            AddPartitionDesc addPartitionDesc = partnIter.next();
            if (!found && addPartitionDesc.getPartition(0).getPartSpec().equals(parsedPartSpec)) {
                found = true;
            } else {
                partnIter.remove();
            }
        }
        if (!found) {
            throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
        }
    }
    if (tblDesc.getTableName() == null) {
        // or from the export dump.
        throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
    } else {
        x.getConf().set("import.destination.table", tblDesc.getTableName());
        for (AddPartitionDesc addPartitionDesc : partitionDescs) {
            addPartitionDesc.setTableName(tblDesc.getTableName());
        }
    }
    Warehouse wh = new Warehouse(x.getConf());
    Table table = tableIfExists(tblDesc, x.getHive());
    boolean tableExists = false;
    if (table != null) {
        checkTable(table, tblDesc, replicationSpec, x.getConf());
        x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
        tableExists = true;
    }
    if (!replicationSpec.isInReplicationScope()) {
        createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs, wh, x);
    } else {
        createReplImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, waitOnPrecursor, table, fromURI, fs, wh, x);
    }
    return tableExists;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) FileSystem(org.apache.hadoop.fs.FileSystem) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Example 7 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class ImportSemanticAnalyzer method createRegularImportTasks.

/**
   * Create tasks for regular import, no repl complexity
   * @param tblDesc
   * @param partitionDescs
   * @param isPartSpecSet
   * @param replicationSpec
   * @param table
   * @param fromURI
   * @param fs
   * @param wh
   */
private static void createRegularImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x) throws HiveException, URISyntaxException, IOException, MetaException {
    if (table != null) {
        if (table.isPartitioned()) {
            x.getLOG().debug("table partitioned");
            for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
                org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
                if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
                    x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
                } else {
                    throw new SemanticException(ErrorMsg.PARTITION_EXISTS.getMsg(partSpecToString(partSpec)));
                }
            }
        } else {
            x.getLOG().debug("table non-partitioned");
            // ensure if destination is not empty only for regular import
            Path tgtPath = new Path(table.getDataLocation().toString());
            FileSystem tgtFs = FileSystem.get(tgtPath.toUri(), x.getConf());
            checkTargetLocationEmpty(tgtFs, tgtPath, replicationSpec, x);
            loadTable(fromURI, table, false, tgtPath, replicationSpec, x);
        }
        // Set this to read because we can't overwrite any existing partitions
        x.getOutputs().add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK));
    } else {
        x.getLOG().debug("table " + tblDesc.getTableName() + " does not exist");
        Task<?> t = createTableTask(tblDesc, x);
        table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
        Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
        // Since we are going to be creating a new table in a db, we should mark that db as a write entity
        // so that the auth framework can go to work there.
        x.getOutputs().add(new WriteEntity(parentDb, WriteEntity.WriteType.DDL_SHARED));
        if (isPartitioned(tblDesc)) {
            for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
            }
        } else {
            x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
            if (tblDesc.isExternal() && (tblDesc.getLocation() == null)) {
                x.getLOG().debug("Importing in place, no emptiness check, no copying/loading");
                Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME);
                tblDesc.setLocation(dataPath.toString());
            } else {
                Path tablePath = null;
                if (tblDesc.getLocation() != null) {
                    tablePath = new Path(tblDesc.getLocation());
                } else {
                    tablePath = wh.getTablePath(parentDb, tblDesc.getTableName());
                }
                FileSystem tgtFs = FileSystem.get(tablePath.toUri(), x.getConf());
                checkTargetLocationEmpty(tgtFs, tablePath, replicationSpec, x);
                t.addDependentTask(loadTable(fromURI, table, false, tablePath, replicationSpec, x));
            }
        }
        x.getTasks().add(t);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.ql.metadata.Table) FileSystem(org.apache.hadoop.fs.FileSystem) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) Database(org.apache.hadoop.hive.metastore.api.Database) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 8 with AddPartitionDesc

use of org.apache.hadoop.hive.ql.plan.AddPartitionDesc in project hive by apache.

the class ImportSemanticAnalyzer method createReplImportTasks.

/**
   * Create tasks for repl import
   */
private static void createReplImportTasks(ImportTableDesc tblDesc, List<AddPartitionDesc> partitionDescs, boolean isPartSpecSet, ReplicationSpec replicationSpec, boolean waitOnPrecursor, Table table, URI fromURI, FileSystem fs, Warehouse wh, EximUtil.SemanticAnalyzerWrapperContext x) throws HiveException, URISyntaxException, IOException, MetaException {
    Task dr = null;
    WriteEntity.WriteType lockType = WriteEntity.WriteType.DDL_NO_LOCK;
    if ((table != null) && (isPartitioned(tblDesc) != table.isPartitioned())) {
        // drop and re-create.
        if (replicationSpec.allowReplacementInto(table)) {
            dr = dropTableTask(table, x);
            lockType = WriteEntity.WriteType.DDL_EXCLUSIVE;
            // null it out so we go into the table re-create flow.
            table = null;
        } else {
            // noop out of here.
            return;
        }
    }
    // Normally, on import, trying to create a table or a partition in a db that does not yet exist
    // is a error condition. However, in the case of a REPL LOAD, it is possible that we are trying
    // to create tasks to create a table inside a db that as-of-now does not exist, but there is
    // a precursor Task waiting that will create it before this is encountered. Thus, we instantiate
    // defaults and do not error out in that case.
    Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName());
    if (parentDb == null) {
        if (!waitOnPrecursor) {
            throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(tblDesc.getDatabaseName()));
        }
    }
    if (tblDesc.getLocation() == null) {
        if (!waitOnPrecursor) {
            tblDesc.setLocation(wh.getTablePath(parentDb, tblDesc.getTableName()).toString());
        } else {
            tblDesc.setLocation(wh.getDnsPath(new Path(wh.getDefaultDatabasePath(tblDesc.getDatabaseName()), MetaStoreUtils.encodeTableName(tblDesc.getTableName().toLowerCase()))).toString());
        }
    }
    if (table == null) {
        if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
            lockType = WriteEntity.WriteType.DDL_SHARED;
        }
        Task t = createTableTask(tblDesc, x);
        table = new Table(tblDesc.getDatabaseName(), tblDesc.getTableName());
        if (!replicationSpec.isMetadataOnly()) {
            if (isPartitioned(tblDesc)) {
                for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                    addPartitionDesc.setReplicationSpec(replicationSpec);
                    t.addDependentTask(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
                }
            } else {
                x.getLOG().debug("adding dependent CopyWork/MoveWork for table");
                t.addDependentTask(loadTable(fromURI, table, true, new Path(tblDesc.getLocation()), replicationSpec, x));
            }
        }
        if (dr == null) {
            // Simply create
            x.getTasks().add(t);
        } else {
            // Drop and recreate
            dr.addDependentTask(t);
            x.getTasks().add(dr);
        }
    } else {
        // Table existed, and is okay to replicate into, not dropping and re-creating.
        if (table.isPartitioned()) {
            x.getLOG().debug("table partitioned");
            for (AddPartitionDesc addPartitionDesc : partitionDescs) {
                addPartitionDesc.setReplicationSpec(replicationSpec);
                Map<String, String> partSpec = addPartitionDesc.getPartition(0).getPartSpec();
                org.apache.hadoop.hive.ql.metadata.Partition ptn = null;
                if ((ptn = x.getHive().getPartition(table, partSpec, false)) == null) {
                    if (!replicationSpec.isMetadataOnly()) {
                        x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
                    }
                } else {
                    // the destination ptn's repl.last.id is older than the replacement's.
                    if (replicationSpec.allowReplacementInto(ptn)) {
                        if (!replicationSpec.isMetadataOnly()) {
                            x.getTasks().add(addSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, x));
                        } else {
                            x.getTasks().add(alterSinglePartition(fromURI, fs, tblDesc, table, wh, addPartitionDesc, replicationSpec, ptn, x));
                        }
                        if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                            lockType = WriteEntity.WriteType.DDL_SHARED;
                        }
                    } else {
                    // ignore this ptn, do nothing, not an error.
                    }
                }
            }
            if (replicationSpec.isMetadataOnly() && partitionDescs.isEmpty()) {
                // MD-ONLY table alter
                x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
                if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                    lockType = WriteEntity.WriteType.DDL_SHARED;
                }
            }
        } else {
            x.getLOG().debug("table non-partitioned");
            if (!replicationSpec.allowReplacementInto(table)) {
                // silently return, table is newer than our replacement.
                return;
            }
            if (!replicationSpec.isMetadataOnly()) {
                // repl-imports are replace-into unless the event is insert-into
                loadTable(fromURI, table, !replicationSpec.isInsert(), new Path(fromURI), replicationSpec, x);
            } else {
                x.getTasks().add(alterTableTask(tblDesc, x, replicationSpec));
            }
            if (lockType == WriteEntity.WriteType.DDL_NO_LOCK) {
                lockType = WriteEntity.WriteType.DDL_SHARED;
            }
        }
    }
    x.getOutputs().add(new WriteEntity(table, lockType));
}
Also used : Path(org.apache.hadoop.fs.Path) ReplCopyTask(org.apache.hadoop.hive.ql.exec.ReplCopyTask) Task(org.apache.hadoop.hive.ql.exec.Task) Table(org.apache.hadoop.hive.ql.metadata.Table) Database(org.apache.hadoop.hive.metastore.api.Database) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

AddPartitionDesc (org.apache.hadoop.hive.ql.plan.AddPartitionDesc)8 Path (org.apache.hadoop.fs.Path)6 Table (org.apache.hadoop.hive.ql.metadata.Table)5 FileSystem (org.apache.hadoop.fs.FileSystem)3 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)3 IOException (java.io.IOException)2 URISyntaxException (java.net.URISyntaxException)2 ArrayList (java.util.ArrayList)2 Database (org.apache.hadoop.hive.metastore.api.Database)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)2 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)2 BufferedWriter (java.io.BufferedWriter)1 FileNotFoundException (java.io.FileNotFoundException)1 OutputStreamWriter (java.io.OutputStreamWriter)1 URI (java.net.URI)1 SQLException (java.sql.SQLException)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1