Search in sources :

Example 26 with LoadTableDesc

use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.

the class LoadSemanticAnalyzer method analyzeLoad.

private void analyzeLoad(ASTNode ast) throws SemanticException {
    fromTree = ast.getChild(0);
    tableTree = ast.getChild(1);
    boolean inputInfo = false;
    // Check the last node
    ASTNode child = (ASTNode) ast.getChild(ast.getChildCount() - 1);
    if (child.getToken().getType() == HiveParser.TOK_INPUTFORMAT) {
        if (child.getChildCount() != 2) {
            throw new SemanticException("FileFormat should contain both input format and Serde");
        }
        try {
            inputFormatClassName = stripQuotes(child.getChild(0).getText());
            serDeClassName = stripQuotes(child.getChild(1).getText());
            inputInfo = true;
        } catch (Exception e) {
            throw new SemanticException("FileFormat inputFormatClassName or serDeClassName is incorrect");
        }
    }
    if ((!inputInfo && ast.getChildCount() == 4) || (inputInfo && ast.getChildCount() == 5)) {
        isLocal = true;
        isOverWrite = true;
    }
    if ((!inputInfo && ast.getChildCount() == 3) || (inputInfo && ast.getChildCount() == 4)) {
        if (ast.getChild(2).getText().toLowerCase().equals("local")) {
            isLocal = true;
        } else {
            isOverWrite = true;
        }
    }
    // initialize load path
    URI fromURI;
    try {
        String fromPath = stripQuotes(fromTree.getText());
        fromURI = initializeFromURI(fromPath, isLocal);
    } catch (IOException | URISyntaxException e) {
        throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_PATH.getMsg(), fromTree, e.getMessage()), e);
    }
    // initialize destination table/partition
    TableSpec ts = new TableSpec(db, conf, (ASTNode) tableTree);
    if (ts.tableHandle.isView() || ts.tableHandle.isMaterializedView()) {
        throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
    }
    if (ts.tableHandle.isNonNative()) {
        throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
    }
    if (ts.tableHandle.isStoredAsSubDirectories()) {
        throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg());
    }
    List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
    if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) {
        // launch a tez job
        reparseAndSuperAnalyze(ts.tableHandle, fromURI);
        return;
    }
    List<String> bucketCols = ts.tableHandle.getBucketCols();
    if (bucketCols != null && !bucketCols.isEmpty()) {
        String error = StrictChecks.checkBucketing(conf);
        if (error != null) {
            // launch a tez job
            reparseAndSuperAnalyze(ts.tableHandle, fromURI);
            return;
        }
    }
    // make sure the arguments make sense
    List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, ts.tableHandle);
    if (queryReWritten) {
        return;
    }
    // for managed tables, make sure the file formats match
    if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType()) && conf.getBoolVar(HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
        ensureFileFormatsMatch(ts, files, fromURI);
    }
    inputs.add(toReadEntity(new Path(fromURI)));
    // create final load/move work
    boolean preservePartitionSpecs = false;
    Map<String, String> partSpec = ts.getPartSpec();
    if (partSpec == null) {
        partSpec = new LinkedHashMap<String, String>();
        outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
    } else {
        try {
            Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
            if (part != null) {
                if (isOverWrite) {
                    outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT_OVERWRITE));
                } else {
                    outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT));
                    // If partition already exists and we aren't overwriting it, then respect
                    // its current location info rather than picking it from the parent TableDesc
                    preservePartitionSpecs = true;
                }
            } else {
                outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
            }
        } catch (HiveException e) {
            throw new SemanticException(e);
        }
    }
    Long writeId = null;
    int stmtId = -1;
    boolean isTxnTable = AcidUtils.isTransactionalTable(ts.tableHandle);
    if (isTxnTable) {
        try {
            writeId = getTxnMgr().getTableWriteId(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
        } catch (LockException ex) {
            throw new SemanticException("Failed to allocate the write id", ex);
        }
        stmtId = getTxnMgr().getStmtIdAndIncrement();
    }
    // Note: this sets LoadFileType incorrectly for ACID; is that relevant for load?
    // See setLoadFileType and setIsAcidIow calls elsewhere for an example.
    LoadTableDesc loadTableWork = new LoadTableDesc(new Path(fromURI), Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING, writeId);
    loadTableWork.setStmtId(stmtId);
    loadTableWork.setInsertOverwrite(isOverWrite);
    if (preservePartitionSpecs) {
        // Note : preservePartitionSpecs=true implies inheritTableSpecs=false but
        // but preservePartitionSpecs=false(default) here is not sufficient enough
        // info to set inheritTableSpecs=true
        loadTableWork.setInheritTableSpecs(false);
    }
    Task<?> childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true, isLocal));
    rootTasks.add(childTask);
    // The user asked for stats to be collected.
    // Some stats like number of rows require a scan of the data
    // However, some other stats, like number of files, do not require a complete scan
    // Update the stats which do not require a complete scan.
    Task<?> statTask = null;
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
        BasicStatsWork basicStatsWork = new BasicStatsWork(loadTableWork);
        basicStatsWork.setNoStatsAggregator(true);
        basicStatsWork.setClearAggregatorStats(true);
        StatsWork columnStatsWork = new StatsWork(ts.tableHandle, basicStatsWork, conf);
        statTask = TaskFactory.get(columnStatsWork);
    }
    if (statTask != null) {
        childTask.addDependentTask(statTask);
    }
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileStatus(org.apache.hadoop.fs.FileStatus) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) DecoderException(org.apache.commons.codec.DecoderException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc)

Example 27 with LoadTableDesc

use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.

the class TruncateTableAnalyzer method addMoveTask.

private void addMoveTask(ASTNode root, Table table, Map<String, String> partitionSpec, Path oldPartitionLocation, Path newPartitionLocation, ListBucketingCtx lbCtx, Path queryTmpdir, Task<?> truncateTask, TableDesc tableDesc) throws SemanticException {
    // Write the output to temporary directory and move it to the final location at the end
    // so the operation is atomic.
    LoadTableDesc loadTableDesc = new LoadTableDesc(queryTmpdir, tableDesc, partitionSpec == null ? new HashMap<>() : partitionSpec);
    loadTableDesc.setLbCtx(lbCtx);
    Task<MoveWork> moveTask = TaskFactory.get(new MoveWork(null, null, loadTableDesc, null, false));
    truncateTask.addDependentTask(moveTask);
    addStatTask(root, table, oldPartitionLocation, newPartitionLocation, loadTableDesc, moveTask);
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) HashMap(java.util.HashMap)

Example 28 with LoadTableDesc

use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.

the class AlterTableConcatenateAnalyzer method addMoveTask.

private void addMoveTask(TableName tableName, Table table, Map<String, String> partitionSpec, Path oldLocation, Path newLocation, ListBucketingCtx lbCtx, TableDesc tableDesc, Path queryTmpDir, Task<?> mergeTask) throws SemanticException {
    // No need to handle MM tables - unsupported path.
    LoadTableDesc loadTableDesc = new LoadTableDesc(queryTmpDir, tableDesc, partitionSpec == null ? new HashMap<>() : partitionSpec);
    loadTableDesc.setLbCtx(lbCtx);
    loadTableDesc.setInheritTableSpecs(true);
    Task<MoveWork> moveTask = TaskFactory.get(new MoveWork(null, null, loadTableDesc, null, false));
    mergeTask.addDependentTask(moveTask);
    addStatTask(tableName, table, partitionSpec, oldLocation, newLocation, loadTableDesc, moveTask);
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) HashMap(java.util.HashMap)

Example 29 with LoadTableDesc

use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.

the class TestGenMapRedUtilsCreateConditionalTask method testMergePathValidMoveWorkReturnsNewMoveWork.

@Test
public void testMergePathValidMoveWorkReturnsNewMoveWork() {
    final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000");
    final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002");
    final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003");
    final MoveWork mockWork = mock(MoveWork.class);
    final LineageState lineageState = new LineageState();
    MoveWork newWork;
    // test using loadFileWork
    when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "", false));
    newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState);
    assertNotNull(newWork);
    assertNotEquals(newWork, mockWork);
    assertEquals(condInputPath, newWork.getLoadFileWork().getSourcePath());
    assertEquals(targetMoveWorkPath, newWork.getLoadFileWork().getTargetDir());
    // test using loadTableWork
    TableDesc tableDesc = new TableDesc();
    reset(mockWork);
    when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc(condOutputPath, tableDesc, null));
    newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState);
    assertNotNull(newWork);
    assertNotEquals(newWork, mockWork);
    assertEquals(condInputPath, newWork.getLoadTableWork().getSourcePath());
    assertTrue(newWork.getLoadTableWork().getTable().equals(tableDesc));
}
Also used : Path(org.apache.hadoop.fs.Path) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) LineageState(org.apache.hadoop.hive.ql.session.LineageState) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) Test(org.junit.Test)

Aggregations

LoadTableDesc (org.apache.hadoop.hive.ql.plan.LoadTableDesc)29 MoveWork (org.apache.hadoop.hive.ql.plan.MoveWork)20 Path (org.apache.hadoop.fs.Path)17 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 LoadFileDesc (org.apache.hadoop.hive.ql.plan.LoadFileDesc)9 ArrayList (java.util.ArrayList)8 LockException (org.apache.hadoop.hive.ql.lockmgr.LockException)8 Partition (org.apache.hadoop.hive.ql.metadata.Partition)8 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)8 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)7 Table (org.apache.hadoop.hive.ql.metadata.Table)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)5 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)5 DynamicPartitionCtx (org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx)5 LoadFileType (org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType)5 LinkedHashMap (java.util.LinkedHashMap)4