use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.
the class LoadSemanticAnalyzer method analyzeLoad.
private void analyzeLoad(ASTNode ast) throws SemanticException {
fromTree = ast.getChild(0);
tableTree = ast.getChild(1);
boolean inputInfo = false;
// Check the last node
ASTNode child = (ASTNode) ast.getChild(ast.getChildCount() - 1);
if (child.getToken().getType() == HiveParser.TOK_INPUTFORMAT) {
if (child.getChildCount() != 2) {
throw new SemanticException("FileFormat should contain both input format and Serde");
}
try {
inputFormatClassName = stripQuotes(child.getChild(0).getText());
serDeClassName = stripQuotes(child.getChild(1).getText());
inputInfo = true;
} catch (Exception e) {
throw new SemanticException("FileFormat inputFormatClassName or serDeClassName is incorrect");
}
}
if ((!inputInfo && ast.getChildCount() == 4) || (inputInfo && ast.getChildCount() == 5)) {
isLocal = true;
isOverWrite = true;
}
if ((!inputInfo && ast.getChildCount() == 3) || (inputInfo && ast.getChildCount() == 4)) {
if (ast.getChild(2).getText().toLowerCase().equals("local")) {
isLocal = true;
} else {
isOverWrite = true;
}
}
// initialize load path
URI fromURI;
try {
String fromPath = stripQuotes(fromTree.getText());
fromURI = initializeFromURI(fromPath, isLocal);
} catch (IOException | URISyntaxException e) {
throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_PATH.getMsg(), fromTree, e.getMessage()), e);
}
// initialize destination table/partition
TableSpec ts = new TableSpec(db, conf, (ASTNode) tableTree);
if (ts.tableHandle.isView() || ts.tableHandle.isMaterializedView()) {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
if (ts.tableHandle.isNonNative()) {
throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
}
if (ts.tableHandle.isStoredAsSubDirectories()) {
throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg());
}
List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) {
// launch a tez job
reparseAndSuperAnalyze(ts.tableHandle, fromURI);
return;
}
List<String> bucketCols = ts.tableHandle.getBucketCols();
if (bucketCols != null && !bucketCols.isEmpty()) {
String error = StrictChecks.checkBucketing(conf);
if (error != null) {
// launch a tez job
reparseAndSuperAnalyze(ts.tableHandle, fromURI);
return;
}
}
// make sure the arguments make sense
List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, ts.tableHandle);
if (queryReWritten) {
return;
}
// for managed tables, make sure the file formats match
if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType()) && conf.getBoolVar(HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
ensureFileFormatsMatch(ts, files, fromURI);
}
inputs.add(toReadEntity(new Path(fromURI)));
// create final load/move work
boolean preservePartitionSpecs = false;
Map<String, String> partSpec = ts.getPartSpec();
if (partSpec == null) {
partSpec = new LinkedHashMap<String, String>();
outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
} else {
try {
Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
if (part != null) {
if (isOverWrite) {
outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT_OVERWRITE));
} else {
outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT));
// If partition already exists and we aren't overwriting it, then respect
// its current location info rather than picking it from the parent TableDesc
preservePartitionSpecs = true;
}
} else {
outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
}
} catch (HiveException e) {
throw new SemanticException(e);
}
}
Long writeId = null;
int stmtId = -1;
boolean isTxnTable = AcidUtils.isTransactionalTable(ts.tableHandle);
if (isTxnTable) {
try {
writeId = getTxnMgr().getTableWriteId(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
} catch (LockException ex) {
throw new SemanticException("Failed to allocate the write id", ex);
}
stmtId = getTxnMgr().getStmtIdAndIncrement();
}
// Note: this sets LoadFileType incorrectly for ACID; is that relevant for load?
// See setLoadFileType and setIsAcidIow calls elsewhere for an example.
LoadTableDesc loadTableWork = new LoadTableDesc(new Path(fromURI), Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING, writeId);
loadTableWork.setStmtId(stmtId);
loadTableWork.setInsertOverwrite(isOverWrite);
if (preservePartitionSpecs) {
// Note : preservePartitionSpecs=true implies inheritTableSpecs=false but
// but preservePartitionSpecs=false(default) here is not sufficient enough
// info to set inheritTableSpecs=true
loadTableWork.setInheritTableSpecs(false);
}
Task<?> childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true, isLocal));
rootTasks.add(childTask);
// The user asked for stats to be collected.
// Some stats like number of rows require a scan of the data
// However, some other stats, like number of files, do not require a complete scan
// Update the stats which do not require a complete scan.
Task<?> statTask = null;
if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
BasicStatsWork basicStatsWork = new BasicStatsWork(loadTableWork);
basicStatsWork.setNoStatsAggregator(true);
basicStatsWork.setClearAggregatorStats(true);
StatsWork columnStatsWork = new StatsWork(ts.tableHandle, basicStatsWork, conf);
statTask = TaskFactory.get(columnStatsWork);
}
if (statTask != null) {
childTask.addDependentTask(statTask);
}
}
use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.
the class TruncateTableAnalyzer method addMoveTask.
private void addMoveTask(ASTNode root, Table table, Map<String, String> partitionSpec, Path oldPartitionLocation, Path newPartitionLocation, ListBucketingCtx lbCtx, Path queryTmpdir, Task<?> truncateTask, TableDesc tableDesc) throws SemanticException {
// Write the output to temporary directory and move it to the final location at the end
// so the operation is atomic.
LoadTableDesc loadTableDesc = new LoadTableDesc(queryTmpdir, tableDesc, partitionSpec == null ? new HashMap<>() : partitionSpec);
loadTableDesc.setLbCtx(lbCtx);
Task<MoveWork> moveTask = TaskFactory.get(new MoveWork(null, null, loadTableDesc, null, false));
truncateTask.addDependentTask(moveTask);
addStatTask(root, table, oldPartitionLocation, newPartitionLocation, loadTableDesc, moveTask);
}
use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.
the class AlterTableConcatenateAnalyzer method addMoveTask.
private void addMoveTask(TableName tableName, Table table, Map<String, String> partitionSpec, Path oldLocation, Path newLocation, ListBucketingCtx lbCtx, TableDesc tableDesc, Path queryTmpDir, Task<?> mergeTask) throws SemanticException {
// No need to handle MM tables - unsupported path.
LoadTableDesc loadTableDesc = new LoadTableDesc(queryTmpDir, tableDesc, partitionSpec == null ? new HashMap<>() : partitionSpec);
loadTableDesc.setLbCtx(lbCtx);
loadTableDesc.setInheritTableSpecs(true);
Task<MoveWork> moveTask = TaskFactory.get(new MoveWork(null, null, loadTableDesc, null, false));
mergeTask.addDependentTask(moveTask);
addStatTask(tableName, table, partitionSpec, oldLocation, newLocation, loadTableDesc, moveTask);
}
use of org.apache.hadoop.hive.ql.plan.LoadTableDesc in project hive by apache.
the class TestGenMapRedUtilsCreateConditionalTask method testMergePathValidMoveWorkReturnsNewMoveWork.
@Test
public void testMergePathValidMoveWorkReturnsNewMoveWork() {
final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000");
final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002");
final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003");
final MoveWork mockWork = mock(MoveWork.class);
final LineageState lineageState = new LineageState();
MoveWork newWork;
// test using loadFileWork
when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "", false));
newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState);
assertNotNull(newWork);
assertNotEquals(newWork, mockWork);
assertEquals(condInputPath, newWork.getLoadFileWork().getSourcePath());
assertEquals(targetMoveWorkPath, newWork.getLoadFileWork().getTargetDir());
// test using loadTableWork
TableDesc tableDesc = new TableDesc();
reset(mockWork);
when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc(condOutputPath, tableDesc, null));
newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState);
assertNotNull(newWork);
assertNotEquals(newWork, mockWork);
assertEquals(condInputPath, newWork.getLoadTableWork().getSourcePath());
assertTrue(newWork.getLoadTableWork().getTable().equals(tableDesc));
}
Aggregations