use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.
the class GenMapRedUtils method setTaskPlan.
/**
* set the current task in the mapredWork.
*
* @param alias
* current alias
* @param topOp
* the top operator of the stack
* @param plan
* current plan
* @param local
* whether you need to add to map-reduce or local work
* @param tt_desc
* table descriptor
* @throws SerDeException
*/
public static void setTaskPlan(Path path, String alias, Operator<? extends OperatorDesc> topOp, MapWork plan, boolean local, TableDesc tt_desc) throws SemanticException {
if (path == null || alias == null) {
return;
}
if (topOp instanceof TableScanOperator) {
try {
Utilities.addSchemaEvolutionToTableScanOperator((StructObjectInspector) tt_desc.getDeserializer().getObjectInspector(), (TableScanOperator) topOp);
} catch (Exception e) {
throw new SemanticException(e);
}
}
if (!local) {
plan.addPathToAlias(path, alias);
plan.addPathToPartitionInfo(path, new PartitionDesc(tt_desc, null));
plan.getAliasToWork().put(alias, topOp);
} else {
// populate local work if needed
MapredLocalWork localPlan = plan.getMapRedLocalWork();
if (localPlan == null) {
localPlan = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
}
assert localPlan.getAliasToWork().get(alias) == null;
assert localPlan.getAliasToFetchWork().get(alias) == null;
localPlan.getAliasToWork().put(alias, topOp);
localPlan.getAliasToFetchWork().put(alias, new FetchWork(new Path(alias), tt_desc));
plan.setMapRedLocalWork(localPlan);
}
}
use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.
the class SparkSortMergeJoinFactory method setupBucketMapJoinInfo.
private static void setupBucketMapJoinInfo(MapWork plan, SMBMapJoinOperator currMapJoinOp) {
if (currMapJoinOp != null) {
Map<String, Map<String, List<String>>> aliasBucketFileNameMapping = currMapJoinOp.getConf().getAliasBucketFileNameMapping();
if (aliasBucketFileNameMapping != null) {
MapredLocalWork localPlan = plan.getMapRedLocalWork();
if (localPlan == null) {
localPlan = currMapJoinOp.getConf().getLocalWork();
} else {
// local plan is not null, we want to merge it into SMBMapJoinOperator's local work
MapredLocalWork smbLocalWork = currMapJoinOp.getConf().getLocalWork();
if (smbLocalWork != null) {
localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
}
}
if (localPlan == null) {
return;
}
plan.setMapRedLocalWork(null);
currMapJoinOp.getConf().setLocalWork(localPlan);
BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext();
localPlan.setBucketMapjoinContext(bucketMJCxt);
bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
bucketMJCxt.setBucketFileNameMapping(currMapJoinOp.getConf().getBigTableBucketNumMapping());
localPlan.setInputFileChangeSensitive(true);
bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias());
bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
bucketMJCxt.setBigTablePartSpecToFileMapping(currMapJoinOp.getConf().getBigTablePartSpecToFileMapping());
plan.setUseBucketizedHiveInputFormat(true);
}
}
}
use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.
the class HashTableLoader method loadDirectly.
private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
MapredLocalWork localWork = context.getLocalWork();
List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
if (directWorks == null || directWorks.isEmpty()) {
return;
}
JobConf job = new JobConf(hconf);
MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
for (Operator<?> operator : directWorks) {
if (operator != null) {
operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
}
}
localTask.setExecContext(context);
localTask.startForward(inputFileName);
MapJoinTableContainer[] tables = sink.getMapJoinTables();
for (int i = 0; i < sink.getNumParent(); i++) {
if (sink.getParentOperators().get(i) != null) {
mapJoinTables[i] = tables[i];
}
}
Arrays.fill(tables, null);
}
use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.
the class HashTableLoader method load.
@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
String currentInputPath = context.getCurrentInputPath().toString();
LOG.info("******* Load from HashTable for input file: " + currentInputPath);
MapredLocalWork localWork = context.getLocalWork();
try {
if (localWork.getDirectFetchOp() != null) {
loadDirectly(mapJoinTables, currentInputPath);
}
Path baseDir = getBaseDir(localWork);
if (baseDir == null) {
return;
}
String fileName = localWork.getBucketFileName(currentInputPath);
for (int pos = 0; pos < mapJoinTables.length; pos++) {
if (pos == desc.getPosBigTable() || mapJoinTables[pos] != null) {
continue;
}
Path path = Utilities.generatePath(baseDir, desc.getDumpFilePrefix(), (byte) pos, fileName);
LOG.info("\tLoad back 1 hashtable file from tmp file uri:" + path);
ObjectInputStream in = new ObjectInputStream(new BufferedInputStream(new FileInputStream(path.toUri().getPath()), 4096));
try {
mapJoinTables[pos] = mapJoinTableSerdes[pos].load(in);
} finally {
in.close();
}
}
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.
the class ExecDriver method main.
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, HiveException {
String planFileName = null;
String jobConfFileName = null;
boolean noLog = false;
String files = null;
String libjars = null;
boolean localtask = false;
try {
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-plan")) {
planFileName = args[++i];
} else if (args[i].equals("-jobconffile")) {
jobConfFileName = args[++i];
} else if (args[i].equals("-nolog")) {
noLog = true;
} else if (args[i].equals("-files")) {
files = args[++i];
} else if (args[i].equals("-libjars")) {
libjars = args[++i];
} else if (args[i].equals("-localtask")) {
localtask = true;
}
}
} catch (IndexOutOfBoundsException e) {
System.err.println("Missing argument to option");
printUsage();
}
JobConf conf;
if (localtask) {
conf = new JobConf(MapredLocalTask.class);
} else {
conf = new JobConf(ExecDriver.class);
}
if (jobConfFileName != null) {
conf.addResource(new Path(jobConfFileName));
}
// Initialize the resources from command line
if (files != null) {
conf.set("tmpfiles", files);
}
if (libjars != null) {
conf.set("tmpjars", libjars);
}
if (UserGroupInformation.isSecurityEnabled()) {
String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
if (hadoopAuthToken != null) {
conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
}
}
boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);
String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim();
if (queryId.isEmpty()) {
queryId = "unknown-" + System.currentTimeMillis();
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, queryId);
}
System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);
if (noLog) {
// If started from main(), and noLog is on, we should not output
// any logs. To turn the log on, please set -Dtest.silent=false
org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger();
NullAppender appender = NullAppender.createNullAppender();
appender.addToLogger(logger.getName(), Level.ERROR);
appender.start();
} else {
setupChildLog4j(conf);
}
Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName());
LogHelper console = new LogHelper(LOG, isSilent);
if (planFileName == null) {
console.printError("Must specify Plan File Name");
printUsage();
}
// that it's easy to find reason for local mode execution failures
for (Appender appender : ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger()).getAppenders().values()) {
if (appender instanceof FileAppender) {
console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
} else if (appender instanceof RollingFileAppender) {
console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
}
}
// the plan file should always be in local directory
Path p = new Path(planFileName);
FileSystem fs = FileSystem.getLocal(conf);
InputStream pathData = fs.open(p);
// child process. so we add it here explicitly
try {
// see also - code in CliDriver.java
ClassLoader loader = conf.getClassLoader();
if (StringUtils.isNotBlank(libjars)) {
loader = Utilities.addToClassPath(loader, StringUtils.split(libjars, ","));
}
conf.setClassLoader(loader);
// Also set this to the Thread ContextClassLoader, so new threads will
// inherit
// this class loader, and propagate into newly created Configurations by
// those
// new threads.
Thread.currentThread().setContextClassLoader(loader);
} catch (Exception e) {
throw new HiveException(e.getMessage(), e);
}
int ret;
if (localtask) {
memoryMXBean = ManagementFactory.getMemoryMXBean();
MapredLocalWork plan = SerializationUtilities.deserializePlan(pathData, MapredLocalWork.class);
MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
ret = ed.executeInProcess(new DriverContext());
} else {
MapredWork plan = SerializationUtilities.deserializePlan(pathData, MapredWork.class);
ExecDriver ed = new ExecDriver(plan, conf, isSilent);
ret = ed.execute(new DriverContext());
}
if (ret != 0) {
System.exit(ret);
}
}
Aggregations