use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.
the class Compiler method setSchema.
/**
* Get a Schema with fields represented with native Hive types.
*/
private void setSchema(BaseSemanticAnalyzer sem) {
Schema schema = new Schema();
// failing that, give up.
if (sem == null) {
LOG.info("No semantic analyzer, using empty schema.");
} else if (sem.getResultSchema() != null) {
List<FieldSchema> lst = sem.getResultSchema();
schema = new Schema(lst, null);
} else if (sem.getFetchTask() != null) {
FetchTask ft = sem.getFetchTask();
TableDesc td = ft.getTblDesc();
// objects, each with a table desc. Let's try to fetch the desc for the first partition and use it's deserializer.
if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
if (ft.getWork().getPartDesc().size() > 0) {
td = ft.getWork().getPartDesc().get(0).getTableDesc();
}
}
if (td == null) {
LOG.info("No returning schema, using empty schema");
} else {
String tableName = "result";
List<FieldSchema> lst = null;
try {
lst = HiveMetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer(driverContext.getConf()));
} catch (Exception e) {
LOG.warn("Error getting schema", e);
}
if (lst != null) {
schema = new Schema(lst, null);
}
}
}
LOG.info("Created Hive schema: " + schema);
driverContext.setSchema(schema);
}
use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.
the class Executor method useFetchFromCache.
private void useFetchFromCache(CacheEntry cacheEntry) {
// Change query FetchTask to use new location specified in results cache.
FetchTask fetchTaskFromCache = (FetchTask) TaskFactory.get(cacheEntry.getFetchWork());
fetchTaskFromCache.initialize(driverContext.getQueryState(), driverContext.getPlan(), null, context);
driverContext.getPlan().setFetchTask(fetchTaskFromCache);
driverContext.setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.QUERY_USING_CACHE, cacheEntry));
}
use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.
the class GenTezUtils method processFileSink.
public static void processFileSink(GenTezProcContext context, FileSinkOperator fileSink) throws SemanticException {
ParseContext parseContext = context.parseContext;
// is INSERT OVERWRITE TABLE
boolean isInsertTable = GenMapRedUtils.isInsertInto(parseContext, fileSink);
HiveConf hconf = parseContext.getConf();
boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, hconf, fileSink, context.currentTask, isInsertTable);
Path finalName = GenMapRedUtils.createMoveTask(context.currentTask, chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask);
if (chDir) {
// Merge the files in the destination table/partitions by creating Map-only merge job
// If underlying data is RCFile or OrcFile, RCFileBlockMerge task or
// OrcFileStripeMerge task would be created.
LOG.info("using CombineHiveInputformat for the merge job");
Utilities.FILE_OP_LOGGER.debug("will generate MR work for merging files from " + fileSink.getConf().getDirName() + " to " + finalName);
GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, context.dependencyTask, context.moveTask, hconf, context.currentTask, parseContext.getQueryState().getLineageState());
}
FetchTask fetchTask = parseContext.getFetchTask();
if (fetchTask != null && context.currentTask.getNumChild() == 0) {
if (fetchTask.isFetchFrom(fileSink.getConf())) {
context.currentTask.setFetchSource(true);
}
}
}
use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.
the class BaseSemanticAnalyzer method createFetchTask.
/**
* Create a FetchTask for a given schema.
*/
protected FetchTask createFetchTask(String tableSchema) {
String schema = "json".equals(conf.get(HiveConf.ConfVars.HIVE_DDL_OUTPUT_FORMAT.varname, "text")) ? "json#string" : tableSchema;
Properties prop = new Properties();
// Sets delimiter to tab (ascii 9)
prop.setProperty(serdeConstants.SERIALIZATION_FORMAT, Integer.toString(Utilities.tabCode));
prop.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, " ");
String[] colTypes = schema.split("#");
prop.setProperty("columns", colTypes[0]);
prop.setProperty("columns.types", colTypes[1]);
prop.setProperty(serdeConstants.SERIALIZATION_LIB, LazySimpleSerDe.class.getName());
prop.setProperty(hive_metastoreConstants.TABLE_BUCKETING_VERSION, "-1");
FetchWork fetch = new FetchWork(ctx.getResFile(), new TableDesc(TextInputFormat.class, IgnoreKeyTextOutputFormat.class, prop), -1);
fetch.setSerializationNullFormat(" ");
return (FetchTask) TaskFactory.get(fetch);
}
use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.
the class ExplainSemanticAnalyzer method analyzeInternal.
@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
final int childCount = ast.getChildCount();
// Skip TOK_QUERY.
int i = 1;
while (i < childCount) {
int explainOptions = ast.getChild(i).getType();
if (explainOptions == HiveParser.KW_FORMATTED) {
config.setFormatted(true);
} else if (explainOptions == HiveParser.KW_EXTENDED) {
config.setExtended(true);
} else if (explainOptions == HiveParser.KW_DEPENDENCY) {
config.setDependency(true);
} else if (explainOptions == HiveParser.KW_CBO) {
config.setCbo(true);
} else if (explainOptions == HiveParser.KW_COST) {
config.setCboCost(true);
} else if (explainOptions == HiveParser.KW_JOINCOST) {
config.setCboJoinCost(true);
} else if (explainOptions == HiveParser.KW_LOGICAL) {
config.setLogical(true);
} else if (explainOptions == HiveParser.KW_AUTHORIZATION) {
config.setAuthorize(true);
} else if (explainOptions == HiveParser.KW_ANALYZE) {
config.setAnalyze(AnalyzeState.RUNNING);
config.setExplainRootPath(ctx.getMRTmpPath());
} else if (explainOptions == HiveParser.KW_VECTORIZATION) {
config.setVectorization(true);
if (i + 1 < childCount) {
int vectorizationOption = ast.getChild(i + 1).getType();
// [ONLY]
if (vectorizationOption == HiveParser.TOK_ONLY) {
config.setVectorizationOnly(true);
i++;
if (i + 1 >= childCount) {
break;
}
vectorizationOption = ast.getChild(i + 1).getType();
}
// [SUMMARY|OPERATOR|EXPRESSION|DETAIL]
if (vectorizationOption == HiveParser.TOK_SUMMARY) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY);
i++;
} else if (vectorizationOption == HiveParser.TOK_OPERATOR) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR);
i++;
} else if (vectorizationOption == HiveParser.TOK_EXPRESSION) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION);
i++;
} else if (vectorizationOption == HiveParser.TOK_DETAIL) {
config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL);
i++;
}
}
} else if (explainOptions == HiveParser.KW_LOCKS) {
config.setLocks(true);
} else if (explainOptions == HiveParser.KW_AST) {
config.setAst(true);
} else if (explainOptions == HiveParser.KW_DEBUG) {
config.setDebug(true);
} else if (explainOptions == HiveParser.KW_DDL) {
config.setDDL(true);
config.setCbo(true);
config.setVectorization(true);
} else {
// UNDONE: UNKNOWN OPTION?
}
i++;
}
ctx.setExplainConfig(config);
ctx.setExplainPlan(true);
ASTNode input = (ASTNode) ast.getChild(0);
// step 2 (ANALYZE_STATE.ANALYZING), explain the query and provide the runtime #rows collected.
if (config.getAnalyze() == AnalyzeState.RUNNING) {
String query = ctx.getTokenRewriteStream().toString(input.getTokenStartIndex(), input.getTokenStopIndex());
LOG.info("Explain analyze (running phase) for query " + query);
conf.unset(ValidTxnList.VALID_TXNS_KEY);
conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
Context runCtx = null;
try {
runCtx = new Context(conf);
// runCtx and ctx share the configuration, but not isExplainPlan()
runCtx.setExplainConfig(config);
try (Driver driver = new Driver(conf, runCtx, queryState.getLineageState())) {
driver.run(query);
while (driver.getResults(new ArrayList<String>())) {
}
} catch (CommandProcessorException e) {
if (e.getCause() instanceof ReCompileException) {
throw (ReCompileException) e.getCause();
} else {
throw new SemanticException(e.getMessage(), e);
}
}
config.setOpIdToRuntimeNumRows(aggregateStats(config.getExplainRootPath()));
} catch (IOException e1) {
throw new SemanticException(e1);
}
ctx.resetOpContext();
ctx.resetStream();
TaskFactory.resetId();
LOG.info("Explain analyze (analyzing phase) for query " + query);
config.setAnalyze(AnalyzeState.ANALYZING);
}
// Creating new QueryState unfortunately causes all .q.out to change - do this in a separate ticket
// Sharing QueryState between generating the plan and executing the query seems bad
// BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(new QueryState(queryState.getConf()), input);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, input);
sem.analyze(input, ctx);
sem.validate();
inputs = sem.getInputs();
outputs = sem.getOutputs();
ctx.setResFile(ctx.getLocalTmpPath());
List<Task<?>> tasks = sem.getAllRootTasks();
if (tasks == null) {
tasks = Collections.emptyList();
}
FetchTask fetchTask = sem.getFetchTask();
if (fetchTask != null) {
// Initialize fetch work such that operator tree will be constructed.
fetchTask.getWork().initializeForFetch(ctx.getOpContext());
}
if (sem instanceof SemanticAnalyzer) {
pCtx = sem.getParseContext();
}
config.setUserLevelExplain(!config.isExtended() && !config.isFormatted() && !config.isDependency() && !config.isCbo() && !config.isLogical() && !config.isVectorization() && !config.isAuthorize() && ((HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) || (HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_SPARK_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark"))));
ExplainWork work = new ExplainWork(ctx.getResFile(), pCtx, tasks, fetchTask, input, sem, config, ctx.getCboInfo(), ctx.getOptimizedSql(), ctx.getCalcitePlan());
work.setAppendTaskType(HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES));
ExplainTask explTask = (ExplainTask) TaskFactory.get(work);
fieldList = ExplainTask.getResultSchema();
rootTasks.add(explTask);
}
Aggregations