Search in sources :

Example 1 with LoadTableDesc

use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.

the class ImportSemanticAnalyzer method addSinglePartition.

private static Task<?> addSinglePartition(URI fromURI, FileSystem fs, ImportTableDesc tblDesc, Table table, Warehouse wh, AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x) throws MetaException, IOException, HiveException {
    AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0);
    if (tblDesc.isExternal() && tblDesc.getLocation() == null) {
        x.getLOG().debug("Importing in-place: adding AddPart for partition " + partSpecToString(partSpec.getPartSpec()));
        // addPartitionDesc already has the right partition location
        Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc), x.getConf());
        return addPartTask;
    } else {
        String srcLocation = partSpec.getLocation();
        fixLocationInPartSpec(fs, tblDesc, table, wh, replicationSpec, partSpec, x);
        x.getLOG().debug("adding dependent CopyWork/AddPart/MoveWork for partition " + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation);
        Path tgtLocation = new Path(partSpec.getLocation());
        Path tmpPath = x.getCtx().getExternalTmpPath(tgtLocation);
        Task<?> copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, new Path(srcLocation), tmpPath, x.getConf());
        Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc), x.getConf());
        LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, Utilities.getTableDesc(table), partSpec.getPartSpec(), true);
        loadTableWork.setInheritTableSpecs(false);
        Task<?> loadPartTask = TaskFactory.get(new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false), x.getConf());
        copyTask.addDependentTask(loadPartTask);
        addPartTask.addDependentTask(loadPartTask);
        x.getTasks().add(copyTask);
        return addPartTask;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Example 2 with LoadTableDesc

use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.

the class ImportSemanticAnalyzer method loadTable.

private static Task<?> loadTable(URI fromURI, Table table, boolean replace, Path tgtPath, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x) {
    Path dataPath = new Path(fromURI.toString(), EximUtil.DATA_PATH_NAME);
    Path tmpPath = x.getCtx().getExternalTmpPath(tgtPath);
    Task<?> copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, dataPath, tmpPath, x.getConf());
    LoadTableDesc loadTableWork = new LoadTableDesc(tmpPath, Utilities.getTableDesc(table), new TreeMap<String, String>(), replace);
    Task<?> loadTableTask = TaskFactory.get(new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false), x.getConf());
    copyTask.addDependentTask(loadTableTask);
    x.getTasks().add(copyTask);
    return loadTableTask;
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc)

Example 3 with LoadTableDesc

use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.

the class IndexUpdater method generateUpdateTasks.

public List<Task<? extends Serializable>> generateUpdateTasks() throws HiveException {
    hive = Hive.get(this.conf);
    for (LoadTableDesc ltd : loadTableWork) {
        TableDesc td = ltd.getTable();
        Table srcTable = hive.getTable(td.getTableName());
        List<Index> tblIndexes = IndexUtils.getAllIndexes(srcTable, (short) -1);
        Map<String, String> partSpec = ltd.getPartitionSpec();
        if (partSpec == null || partSpec.size() == 0) {
            //unpartitioned table, update whole index
            doIndexUpdate(tblIndexes);
        } else {
            doIndexUpdate(tblIndexes, partSpec);
        }
    }
    return tasks;
}
Also used : LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) Index(org.apache.hadoop.hive.metastore.api.Index) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc)

Example 4 with LoadTableDesc

use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.

the class StatsTask method getPartitionsList.

/**
   * Get the list of partitions that need to update statistics.
   * TODO: we should reuse the Partitions generated at compile time
   * since getting the list of partitions is quite expensive.
   *
   * @return a list of partitions that need to update statistics.
   * @throws HiveException
   */
private List<Partition> getPartitionsList(Hive db) throws HiveException {
    if (work.getLoadFileDesc() != null) {
        //we are in CTAS, so we know there are no partitions
        return null;
    }
    List<Partition> list = new ArrayList<Partition>();
    if (work.getTableSpecs() != null) {
        // ANALYZE command
        TableSpec tblSpec = work.getTableSpecs();
        table = tblSpec.tableHandle;
        if (!table.isPartitioned()) {
            return null;
        }
        // get all partitions that matches with the partition spec
        List<Partition> partitions = tblSpec.partitions;
        if (partitions != null) {
            for (Partition partn : partitions) {
                list.add(partn);
            }
        }
    } else if (work.getLoadTableDesc() != null) {
        // INSERT OVERWRITE command
        LoadTableDesc tbd = work.getLoadTableDesc();
        table = db.getTable(tbd.getTable().getTableName());
        if (!table.isPartitioned()) {
            return null;
        }
        DynamicPartitionCtx dpCtx = tbd.getDPCtx();
        if (dpCtx != null && dpCtx.getNumDPCols() > 0) {
            // If no dynamic partitions are generated, dpPartSpecs may not be initialized
            if (dpPartSpecs != null) {
                // load the list of DP partitions and return the list of partition specs
                list.addAll(dpPartSpecs);
            }
        } else {
            // static partition
            Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
            list.add(partn);
        }
    }
    return list;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableSpec(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec) ArrayList(java.util.ArrayList) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx)

Example 5 with LoadTableDesc

use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.

the class DDLSemanticAnalyzer method analyzeAlterTablePartMergeFiles.

private void analyzeAlterTablePartMergeFiles(ASTNode ast, String tableName, HashMap<String, String> partSpec) throws SemanticException {
    AlterTablePartMergeFilesDesc mergeDesc = new AlterTablePartMergeFilesDesc(tableName, partSpec);
    List<Path> inputDir = new ArrayList<Path>();
    Path oldTblPartLoc = null;
    Path newTblPartLoc = null;
    Table tblObj = null;
    ListBucketingCtx lbCtx = null;
    try {
        tblObj = getTable(tableName);
        // TODO: we should probably block all ACID tables here.
        if (AcidUtils.isInsertOnlyTable(tblObj.getParameters())) {
            throw new SemanticException("Merge is not supported for MM tables");
        }
        mergeDesc.setTableDesc(Utilities.getTableDesc(tblObj));
        List<String> bucketCols = null;
        Class<? extends InputFormat> inputFormatClass = null;
        boolean isArchived = false;
        if (tblObj.isPartitioned()) {
            if (partSpec == null) {
                throw new SemanticException("source table " + tableName + " is partitioned but no partition desc found.");
            } else {
                Partition part = getPartition(tblObj, partSpec, false);
                if (part == null) {
                    throw new SemanticException("source table " + tableName + " is partitioned but partition not found.");
                }
                bucketCols = part.getBucketCols();
                inputFormatClass = part.getInputFormatClass();
                isArchived = ArchiveUtils.isArchived(part);
                Path tabPath = tblObj.getPath();
                Path partPath = part.getDataLocation();
                // if the table is in a different dfs than the partition,
                // replace the partition's dfs with the table's dfs.
                newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
                oldTblPartLoc = partPath;
                lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
            }
        } else {
            inputFormatClass = tblObj.getInputFormatClass();
            bucketCols = tblObj.getBucketCols();
            // input and output are the same
            oldTblPartLoc = tblObj.getPath();
            newTblPartLoc = tblObj.getPath();
            lbCtx = constructListBucketingCtx(tblObj.getSkewedColNames(), tblObj.getSkewedColValues(), tblObj.getSkewedColValueLocationMaps(), tblObj.isStoredAsSubDirectories(), conf);
        }
        // throw a HiveException for other than rcfile and orcfile.
        if (!((inputFormatClass.equals(RCFileInputFormat.class) || (inputFormatClass.equals(OrcInputFormat.class))))) {
            throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_FILE_FORMAT.getMsg());
        }
        mergeDesc.setInputFormatClass(inputFormatClass);
        // throw a HiveException if the table/partition is bucketized
        if (bucketCols != null && bucketCols.size() > 0) {
            throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_BUCKETED.getMsg());
        }
        // throw a HiveException if the table/partition is archived
        if (isArchived) {
            throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_PARTITION_ARCHIVED.getMsg());
        }
        // violating which can cause data loss
        if (tblObj.isNonNative()) {
            throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_NON_NATIVE.getMsg());
        }
        if (tblObj.getTableType() != TableType.MANAGED_TABLE) {
            throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_NOT_MANAGED.getMsg());
        }
        // transactional tables are compacted and no longer needs to be bucketed, so not safe for merge/concatenation
        boolean isAcid = AcidUtils.isTransactionalTable(tblObj);
        if (isAcid) {
            throw new SemanticException(ErrorMsg.CONCATENATE_UNSUPPORTED_TABLE_TRANSACTIONAL.getMsg());
        }
        inputDir.add(oldTblPartLoc);
        mergeDesc.setInputDir(inputDir);
        mergeDesc.setLbCtx(lbCtx);
        addInputsOutputsAlterTable(tableName, partSpec, AlterTableTypes.MERGEFILES);
        DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), mergeDesc);
        ddlWork.setNeedLock(true);
        Task<? extends Serializable> mergeTask = TaskFactory.get(ddlWork);
        TableDesc tblDesc = Utilities.getTableDesc(tblObj);
        Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
        mergeDesc.setOutputDir(queryTmpdir);
        // No need to handle MM tables - unsupported path.
        LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<>() : partSpec);
        ltd.setLbCtx(lbCtx);
        Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
        mergeTask.addDependentTask(moveTsk);
        if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
            BasicStatsWork basicStatsWork;
            if (oldTblPartLoc.equals(newTblPartLoc)) {
                // If we're merging to the same location, we can avoid some metastore calls
                TableSpec tableSpec = new TableSpec(db, tableName, partSpec);
                basicStatsWork = new BasicStatsWork(tableSpec);
            } else {
                basicStatsWork = new BasicStatsWork(ltd);
            }
            basicStatsWork.setNoStatsAggregator(true);
            basicStatsWork.setClearAggregatorStats(true);
            StatsWork columnStatsWork = new StatsWork(tblObj, basicStatsWork, conf);
            Task<? extends Serializable> statTask = TaskFactory.get(columnStatsWork);
            moveTsk.addDependentTask(statTask);
        }
        rootTasks.add(mergeTask);
    } catch (Exception e) {
        throw new SemanticException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) InvocationTargetException(java.lang.reflect.InvocationTargetException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) DescTableDesc(org.apache.hadoop.hive.ql.plan.DescTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) UnlockTableDesc(org.apache.hadoop.hive.ql.plan.UnlockTableDesc) DropTableDesc(org.apache.hadoop.hive.ql.plan.DropTableDesc) ShowCreateTableDesc(org.apache.hadoop.hive.ql.plan.ShowCreateTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LockTableDesc(org.apache.hadoop.hive.ql.plan.LockTableDesc) TruncateTableDesc(org.apache.hadoop.hive.ql.plan.TruncateTableDesc) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork)

Aggregations

LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)29 MoveWork (org.apache.hadoop.hive.ql.plan.MoveWork)20 Path (org.apache.hadoop.fs.Path)17 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 LoadFileDesc (org.apache.hadoop.hive.ql.plan.LoadFileDesc)9 ArrayList (java.util.ArrayList)8 LockException (org.apache.hadoop.hive.ql.lockmgr.LockException)8 Partition (org.apache.hadoop.hive.ql.metadata.Partition)8 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)8 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)7 Table (org.apache.hadoop.hive.ql.metadata.Table)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)5 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)5 DynamicPartitionCtx (org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx)5 LoadFileType (org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType)5 LinkedHashMap (java.util.LinkedHashMap)4