Examples with MoveWork - org.apache.hadoop.hive.ql.plan.MoveWork

Example 21 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class DDLSemanticAnalyzer method analyzeTruncateTable.

private void analyzeTruncateTable(ASTNode ast) throws SemanticException {
    // TOK_TABLE_PARTITION
    ASTNode root = (ASTNode) ast.getChild(0);
    String tableName = getUnescapedName((ASTNode) root.getChild(0));
    Table table = getTable(tableName, true);
    if (table.getTableType() != TableType.MANAGED_TABLE) {
        throw new SemanticException(ErrorMsg.TRUNCATE_FOR_NON_MANAGED_TABLE.format(tableName));
    }
    if (table.isNonNative()) {
        // TODO
        throw new SemanticException(ErrorMsg.TRUNCATE_FOR_NON_NATIVE_TABLE.format(tableName));
    }
    if (!table.isPartitioned() && root.getChildCount() > 1) {
        throw new SemanticException(ErrorMsg.PARTSPEC_FOR_NON_PARTITIONED_TABLE.format(tableName));
    }
    Map<String, String> partSpec = getPartSpec((ASTNode) root.getChild(1));
    if (partSpec == null) {
        if (!table.isPartitioned()) {
            outputs.add(new WriteEntity(table, WriteEntity.WriteType.DDL_EXCLUSIVE));
        } else {
            for (Partition partition : getPartitions(table, null, false)) {
                outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
            }
        }
    } else {
        if (isFullSpec(table, partSpec)) {
            validatePartSpec(table, partSpec, (ASTNode) root.getChild(1), conf, true);
            Partition partition = getPartition(table, partSpec, true);
            outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
        } else {
            validatePartSpec(table, partSpec, (ASTNode) root.getChild(1), conf, false);
            for (Partition partition : getPartitions(table, partSpec, false)) {
                outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
            }
        }
    }
    TruncateTableDesc truncateTblDesc = new TruncateTableDesc(tableName, partSpec, null);
    DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), truncateTblDesc);
    Task<? extends Serializable> truncateTask = TaskFactory.get(ddlWork);
    // Is this a truncate column command
    List<String> columnNames = null;
    if (ast.getChildCount() == 2) {
        try {
            columnNames = getColumnNames((ASTNode) ast.getChild(1));
            // It would be possible to support this, but this is such a pointless command.
            if (AcidUtils.isInsertOnlyTable(table.getParameters())) {
                throw new SemanticException("Truncating MM table columns not presently supported");
            }
            List<String> bucketCols = null;
            Class<? extends InputFormat> inputFormatClass = null;
            boolean isArchived = false;
            Path newTblPartLoc = null;
            Path oldTblPartLoc = null;
            List<FieldSchema> cols = null;
            ListBucketingCtx lbCtx = null;
            boolean isListBucketed = false;
            List<String> listBucketColNames = null;
            if (table.isPartitioned()) {
                Partition part = db.getPartition(table, partSpec, false);
                Path tabPath = table.getPath();
                Path partPath = part.getDataLocation();
                // if the table is in a different dfs than the partition,
                // replace the partition's dfs with the table's dfs.
                newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
                oldTblPartLoc = partPath;
                cols = part.getCols();
                bucketCols = part.getBucketCols();
                inputFormatClass = part.getInputFormatClass();
                isArchived = ArchiveUtils.isArchived(part);
                lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
                isListBucketed = part.isStoredAsSubDirectories();
                listBucketColNames = part.getSkewedColNames();
            } else {
                // input and output are the same
                oldTblPartLoc = table.getPath();
                newTblPartLoc = table.getPath();
                cols = table.getCols();
                bucketCols = table.getBucketCols();
                inputFormatClass = table.getInputFormatClass();
                lbCtx = constructListBucketingCtx(table.getSkewedColNames(), table.getSkewedColValues(), table.getSkewedColValueLocationMaps(), table.isStoredAsSubDirectories(), conf);
                isListBucketed = table.isStoredAsSubDirectories();
                listBucketColNames = table.getSkewedColNames();
            }
            // throw a HiveException for non-rcfile.
            if (!inputFormatClass.equals(RCFileInputFormat.class)) {
                throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_NOT_RC.getMsg());
            }
            // throw a HiveException if the table/partition is archived
            if (isArchived) {
                throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_ARCHIVED.getMsg());
            }
            Set<Integer> columnIndexes = new HashSet<Integer>();
            for (String columnName : columnNames) {
                boolean found = false;
                for (int columnIndex = 0; columnIndex < cols.size(); columnIndex++) {
                    if (columnName.equalsIgnoreCase(cols.get(columnIndex).getName())) {
                        columnIndexes.add(columnIndex);
                        found = true;
                        break;
                    }
                }
                // Throw an exception if the user is trying to truncate a column which doesn't exist
                if (!found) {
                    throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(columnName));
                }
                // Throw an exception if the table/partition is bucketed on one of the columns
                for (String bucketCol : bucketCols) {
                    if (bucketCol.equalsIgnoreCase(columnName)) {
                        throw new SemanticException(ErrorMsg.TRUNCATE_BUCKETED_COLUMN.getMsg(columnName));
                    }
                }
                if (isListBucketed) {
                    for (String listBucketCol : listBucketColNames) {
                        if (listBucketCol.equalsIgnoreCase(columnName)) {
                            throw new SemanticException(ErrorMsg.TRUNCATE_LIST_BUCKETED_COLUMN.getMsg(columnName));
                        }
                    }
                }
            }
            truncateTblDesc.setColumnIndexes(new ArrayList<Integer>(columnIndexes));
            truncateTblDesc.setInputDir(oldTblPartLoc);
            truncateTblDesc.setLbCtx(lbCtx);
            addInputsOutputsAlterTable(tableName, partSpec, AlterTableTypes.TRUNCATE);
            ddlWork.setNeedLock(true);
            TableDesc tblDesc = Utilities.getTableDesc(table);
            // Write the output to temporary directory and move it to the final location at the end
            // so the operation is atomic.
            Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
            truncateTblDesc.setOutputDir(queryTmpdir);
            LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<>() : partSpec);
            ltd.setLbCtx(lbCtx);
            @SuppressWarnings("unchecked") Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
            truncateTask.addDependentTask(moveTsk);
            // Recalculate the HDFS stats if auto gather stats is set
            if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
                BasicStatsWork basicStatsWork;
                if (oldTblPartLoc.equals(newTblPartLoc)) {
                    // If we're merging to the same location, we can avoid some metastore calls
                    TableSpec tablepart = new TableSpec(this.db, conf, root);
                    basicStatsWork = new BasicStatsWork(tablepart);
                } else {
                    basicStatsWork = new BasicStatsWork(ltd);
                }
                basicStatsWork.setNoStatsAggregator(true);
                basicStatsWork.setClearAggregatorStats(true);
                StatsWork columnStatsWork = new StatsWork(table, basicStatsWork, conf);
                Task<? extends Serializable> statTask = TaskFactory.get(columnStatsWork);
                moveTsk.addDependentTask(statTask);
            }
        } catch (HiveException e) {
            throw new SemanticException(e);
        }
    }
    rootTasks.add(truncateTask);
}

Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) TruncateTableDesc(org.apache.hadoop.hive.ql.plan.TruncateTableDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) DescTableDesc(org.apache.hadoop.hive.ql.plan.DescTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) UnlockTableDesc(org.apache.hadoop.hive.ql.plan.UnlockTableDesc) DropTableDesc(org.apache.hadoop.hive.ql.plan.DropTableDesc) ShowCreateTableDesc(org.apache.hadoop.hive.ql.plan.ShowCreateTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LockTableDesc(org.apache.hadoop.hive.ql.plan.LockTableDesc) TruncateTableDesc(org.apache.hadoop.hive.ql.plan.TruncateTableDesc)

Example 22 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class ImportSemanticAnalyzer method addSinglePartition.

private static Task<?> addSinglePartition(URI fromURI, FileSystem fs, ImportTableDesc tblDesc, Table table, Warehouse wh, AddPartitionDesc addPartitionDesc, ReplicationSpec replicationSpec, EximUtil.SemanticAnalyzerWrapperContext x, Long writeId, int stmtId, boolean isSourceMm, Task<?> commitTask) throws MetaException, IOException, HiveException {
    AddPartitionDesc.OnePartitionDesc partSpec = addPartitionDesc.getPartition(0);
    if (tblDesc.isExternal() && tblDesc.getLocation() == null) {
        x.getLOG().debug("Importing in-place: adding AddPart for partition " + partSpecToString(partSpec.getPartSpec()));
        // addPartitionDesc already has the right partition location
        @SuppressWarnings("unchecked") Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc));
        return addPartTask;
    } else {
        String srcLocation = partSpec.getLocation();
        fixLocationInPartSpec(fs, tblDesc, table, wh, replicationSpec, partSpec, x);
        x.getLOG().debug("adding dependent CopyWork/AddPart/MoveWork for partition " + partSpecToString(partSpec.getPartSpec()) + " with source location: " + srcLocation);
        Path tgtLocation = new Path(partSpec.getLocation());
        Path destPath = !AcidUtils.isInsertOnlyTable(table.getParameters()) ? x.getCtx().getExternalTmpPath(tgtLocation) : new Path(tgtLocation, AcidUtils.deltaSubdir(writeId, writeId, stmtId));
        Path moveTaskSrc = !AcidUtils.isInsertOnlyTable(table.getParameters()) ? destPath : tgtLocation;
        if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
            Utilities.FILE_OP_LOGGER.trace("adding import work for partition with source location: " + srcLocation + "; target: " + tgtLocation + "; copy dest " + destPath + "; mm " + writeId + " (src " + isSourceMm + ") for " + partSpecToString(partSpec.getPartSpec()));
        }
        Task<?> copyTask = null;
        if (replicationSpec.isInReplicationScope()) {
            if (isSourceMm || isAcid(writeId)) {
                // Note: this is replication gap, not MM gap... Repl V2 is not ready yet.
                throw new RuntimeException("Replicating MM and ACID tables is not supported");
            }
            copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, new Path(srcLocation), destPath, x.getConf());
        } else {
            CopyWork cw = new CopyWork(new Path(srcLocation), destPath, false);
            cw.setSkipSourceMmDirs(isSourceMm);
            copyTask = TaskFactory.get(cw);
        }
        Task<?> addPartTask = TaskFactory.get(new DDLWork(x.getInputs(), x.getOutputs(), addPartitionDesc));
        // Note: this sets LoadFileType incorrectly for ACID; is that relevant for import?
        // See setLoadFileType and setIsAcidIow calls elsewhere for an example.
        LoadTableDesc loadTableWork = new LoadTableDesc(moveTaskSrc, Utilities.getTableDesc(table), partSpec.getPartSpec(), replicationSpec.isReplace() ? LoadFileType.REPLACE_ALL : LoadFileType.OVERWRITE_EXISTING, writeId);
        loadTableWork.setStmtId(stmtId);
        loadTableWork.setInheritTableSpecs(false);
        Task<?> loadPartTask = TaskFactory.get(new MoveWork(x.getInputs(), x.getOutputs(), loadTableWork, null, false));
        copyTask.addDependentTask(loadPartTask);
        addPartTask.addDependentTask(loadPartTask);
        x.getTasks().add(copyTask);
        if (commitTask != null) {
            loadPartTask.addDependentTask(commitTask);
        }
        return addPartTask;
    }
}

Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) CopyWork(org.apache.hadoop.hive.ql.plan.CopyWork) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Example 23 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class LoadSemanticAnalyzer method analyzeInternal.

@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
    boolean isLocal = false;
    boolean isOverWrite = false;
    Tree fromTree = ast.getChild(0);
    Tree tableTree = ast.getChild(1);
    if (ast.getChildCount() == 4) {
        isLocal = true;
        isOverWrite = true;
    }
    if (ast.getChildCount() == 3) {
        if (ast.getChild(2).getText().toLowerCase().equals("local")) {
            isLocal = true;
        } else {
            isOverWrite = true;
        }
    }
    // initialize load path
    URI fromURI;
    try {
        String fromPath = stripQuotes(fromTree.getText());
        fromURI = initializeFromURI(fromPath, isLocal);
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
    } catch (URISyntaxException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
    }
    // initialize destination table/partition
    TableSpec ts = new TableSpec(db, conf, (ASTNode) tableTree);
    if (ts.tableHandle.isView() || ts.tableHandle.isMaterializedView()) {
        throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
    }
    if (ts.tableHandle.isNonNative()) {
        throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
    }
    if (ts.tableHandle.isStoredAsSubDirectories()) {
        throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg());
    }
    List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
    if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) {
        throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
    }
    List<String> bucketCols = ts.tableHandle.getBucketCols();
    if (bucketCols != null && !bucketCols.isEmpty()) {
        String error = StrictChecks.checkBucketing(conf);
        if (error != null) {
            throw new SemanticException("Please load into an intermediate table" + " and use 'insert... select' to allow Hive to enforce bucketing. " + error);
        }
    }
    // make sure the arguments make sense
    List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, fromTree, isLocal, ts.tableHandle);
    // for managed tables, make sure the file formats match
    if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType()) && conf.getBoolVar(HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
        ensureFileFormatsMatch(ts, files, fromURI);
    }
    inputs.add(toReadEntity(new Path(fromURI)));
    Task<? extends Serializable> rTask = null;
    // create final load/move work
    boolean preservePartitionSpecs = false;
    Map<String, String> partSpec = ts.getPartSpec();
    if (partSpec == null) {
        partSpec = new LinkedHashMap<String, String>();
        outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
    } else {
        try {
            Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
            if (part != null) {
                if (isOverWrite) {
                    outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT_OVERWRITE));
                } else {
                    outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT));
                    // If partition already exists and we aren't overwriting it, then respect
                    // its current location info rather than picking it from the parent TableDesc
                    preservePartitionSpecs = true;
                }
            } else {
                outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
            }
        } catch (HiveException e) {
            throw new SemanticException(e);
        }
    }
    Long writeId = null;
    int stmtId = -1;
    if (AcidUtils.isTransactionalTable(ts.tableHandle)) {
        try {
            writeId = SessionState.get().getTxnMgr().getTableWriteId(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
        } catch (LockException ex) {
            throw new SemanticException("Failed to allocate the write id", ex);
        }
        stmtId = SessionState.get().getTxnMgr().getStmtIdAndIncrement();
    }
    // Note: this sets LoadFileType incorrectly for ACID; is that relevant for load?
    // See setLoadFileType and setIsAcidIow calls elsewhere for an example.
    LoadTableDesc loadTableWork = new LoadTableDesc(new Path(fromURI), Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING, writeId);
    loadTableWork.setStmtId(stmtId);
    if (preservePartitionSpecs) {
        // Note : preservePartitionSpecs=true implies inheritTableSpecs=false but
        // but preservePartitionSpecs=false(default) here is not sufficient enough
        // info to set inheritTableSpecs=true
        loadTableWork.setInheritTableSpecs(false);
    }
    Task<? extends Serializable> childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true, isLocal));
    if (rTask != null) {
        rTask.addDependentTask(childTask);
    } else {
        rTask = childTask;
    }
    rootTasks.add(rTask);
    // The user asked for stats to be collected.
    // Some stats like number of rows require a scan of the data
    // However, some other stats, like number of files, do not require a complete scan
    // Update the stats which do not require a complete scan.
    Task<? extends Serializable> statTask = null;
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
        BasicStatsWork basicStatsWork = new BasicStatsWork(loadTableWork);
        basicStatsWork.setNoStatsAggregator(true);
        basicStatsWork.setClearAggregatorStats(true);
        StatsWork columnStatsWork = new StatsWork(ts.tableHandle, basicStatsWork, conf);
        statTask = TaskFactory.get(columnStatsWork);
    }
    if (statTask != null) {
        childTask.addDependentTask(statTask);
    }
}

Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileStatus(org.apache.hadoop.fs.FileStatus) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) Tree(org.antlr.runtime.tree.Tree) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) IOException(java.io.IOException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc)

Example 24 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class GenSparkUtils method createMoveTask.

/**
 * Create and add any dependent move tasks.
 *
 * This is forked from {@link GenMapRedUtils}. The difference is that it doesn't check
 * 'isLinkedFileSink' and does not set parent dir for the linked file sinks.
 */
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
    Path dest = null;
    FileSinkDesc fileSinkDesc = fsOp.getConf();
    if (chDir) {
        dest = fsOp.getConf().getFinalDirName();
        // generate the temporary file
        // it must be on the same file system as the current destination
        Context baseCtx = parseCtx.getContext();
        Path tmpDir = baseCtx.getExternalTmpPath(dest);
        // Change all the linked file sink descriptors
        if (fileSinkDesc.getLinkedFileSinkDesc() != null) {
            for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
                fsConf.setDirName(tmpDir);
            }
        } else {
            fileSinkDesc.setDirName(tmpDir);
        }
    }
    Task<MoveWork> mvTask = null;
    if (!chDir) {
        mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fileSinkDesc.getFinalDirName(), false);
    }
    // Set the move task to be dependent on the current task
    if (mvTask != null) {
        GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
    }
    return dest;
}

Also used : Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 25 with MoveWork

use of org.apache.hadoop.hive.ql.plan.MoveWork in project hive by apache.

the class TestGenMapRedUtilsCreateConditionalTask method testMovePathsThatCanBeMerged.

@Test
public void testMovePathsThatCanBeMerged() {
    final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000");
    final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002");
    final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003");
    final MoveWork mockWork = mock(MoveWork.class);
    when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "", false));
    assertTrue("Merging BlobStore paths should be allowed.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork));
}

Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) Test(org.junit.Test)

Aggregations

MoveWork (org.apache.hadoop.hive.ql.plan.MoveWork)29 Path (org.apache.hadoop.fs.Path)21 LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)16 LoadFileDesc (org.apache.hadoop.hive.ql.plan.LoadFileDesc)9 Test (org.junit.Test)7 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)6 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)6 Task (org.apache.hadoop.hive.ql.exec.Task)5 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)5 Serializable (java.io.Serializable)4 ArrayList (java.util.ArrayList)4 Context (org.apache.hadoop.hive.ql.Context)4 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)4 MoveTask (org.apache.hadoop.hive.ql.exec.MoveTask)4 MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)4 Partition (org.apache.hadoop.hive.ql.metadata.Partition)4 BasicStatsWork (org.apache.hadoop.hive.ql.plan.BasicStatsWork)4 StatsWork (org.apache.hadoop.hive.ql.plan.StatsWork)4 URISyntaxException (java.net.URISyntaxException)3