Search in sources :

Example 6 with MapredLocalWork

use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.

the class GenMapRedUtils method setTaskPlan.

/**
   * set the current task in the mapredWork.
   *
   * @param alias
   *          current alias
   * @param topOp
   *          the top operator of the stack
   * @param plan
   *          current plan
   * @param local
   *          whether you need to add to map-reduce or local work
   * @param tt_desc
   *          table descriptor
   * @throws SerDeException
   */
public static void setTaskPlan(Path path, String alias, Operator<? extends OperatorDesc> topOp, MapWork plan, boolean local, TableDesc tt_desc) throws SemanticException {
    if (path == null || alias == null) {
        return;
    }
    if (topOp instanceof TableScanOperator) {
        try {
            Utilities.addSchemaEvolutionToTableScanOperator((StructObjectInspector) tt_desc.getDeserializer().getObjectInspector(), (TableScanOperator) topOp);
        } catch (Exception e) {
            throw new SemanticException(e);
        }
    }
    if (!local) {
        plan.addPathToAlias(path, alias);
        plan.addPathToPartitionInfo(path, new PartitionDesc(tt_desc, null));
        plan.getAliasToWork().put(alias, topOp);
    } else {
        // populate local work if needed
        MapredLocalWork localPlan = plan.getMapRedLocalWork();
        if (localPlan == null) {
            localPlan = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
        }
        assert localPlan.getAliasToWork().get(alias) == null;
        assert localPlan.getAliasToFetchWork().get(alias) == null;
        localPlan.getAliasToWork().put(alias, topOp);
        localPlan.getAliasToFetchWork().put(alias, new FetchWork(new Path(alias), tt_desc));
        plan.setMapRedLocalWork(localPlan);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) LinkedHashMap(java.util.LinkedHashMap)

Example 7 with MapredLocalWork

use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.

the class SparkSortMergeJoinFactory method setupBucketMapJoinInfo.

private static void setupBucketMapJoinInfo(MapWork plan, SMBMapJoinOperator currMapJoinOp) {
    if (currMapJoinOp != null) {
        Map<String, Map<String, List<String>>> aliasBucketFileNameMapping = currMapJoinOp.getConf().getAliasBucketFileNameMapping();
        if (aliasBucketFileNameMapping != null) {
            MapredLocalWork localPlan = plan.getMapRedLocalWork();
            if (localPlan == null) {
                localPlan = currMapJoinOp.getConf().getLocalWork();
            } else {
                // local plan is not null, we want to merge it into SMBMapJoinOperator's local work
                MapredLocalWork smbLocalWork = currMapJoinOp.getConf().getLocalWork();
                if (smbLocalWork != null) {
                    localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
                    localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
                }
            }
            if (localPlan == null) {
                return;
            }
            plan.setMapRedLocalWork(null);
            currMapJoinOp.getConf().setLocalWork(localPlan);
            BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext();
            localPlan.setBucketMapjoinContext(bucketMJCxt);
            bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
            bucketMJCxt.setBucketFileNameMapping(currMapJoinOp.getConf().getBigTableBucketNumMapping());
            localPlan.setInputFileChangeSensitive(true);
            bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias());
            bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
            bucketMJCxt.setBigTablePartSpecToFileMapping(currMapJoinOp.getConf().getBigTablePartSpecToFileMapping());
            plan.setUseBucketizedHiveInputFormat(true);
        }
    }
}
Also used : BucketMapJoinContext(org.apache.hadoop.hive.ql.plan.BucketMapJoinContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) Map(java.util.Map)

Example 8 with MapredLocalWork

use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.

the class HashTableLoader method loadDirectly.

private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
    MapredLocalWork localWork = context.getLocalWork();
    List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
    if (directWorks == null || directWorks.isEmpty()) {
        return;
    }
    JobConf job = new JobConf(hconf);
    MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
    HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
    sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
    for (Operator<?> operator : directWorks) {
        if (operator != null) {
            operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
        }
    }
    localTask.setExecContext(context);
    localTask.startForward(inputFileName);
    MapJoinTableContainer[] tables = sink.getMapJoinTables();
    for (int i = 0; i < sink.getNumParent(); i++) {
        if (sink.getParentOperators().get(i) != null) {
            mapJoinTables[i] = tables[i];
        }
    }
    Arrays.fill(tables, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 9 with MapredLocalWork

use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.

the class HashTableLoader method load.

@Override
public void load(MapJoinTableContainer[] mapJoinTables, MapJoinTableContainerSerDe[] mapJoinTableSerdes) throws HiveException {
    String currentInputPath = context.getCurrentInputPath().toString();
    LOG.info("******* Load from HashTable for input file: " + currentInputPath);
    MapredLocalWork localWork = context.getLocalWork();
    try {
        if (localWork.getDirectFetchOp() != null) {
            loadDirectly(mapJoinTables, currentInputPath);
        }
        Path baseDir = getBaseDir(localWork);
        if (baseDir == null) {
            return;
        }
        String fileName = localWork.getBucketFileName(currentInputPath);
        for (int pos = 0; pos < mapJoinTables.length; pos++) {
            if (pos == desc.getPosBigTable() || mapJoinTables[pos] != null) {
                continue;
            }
            Path path = Utilities.generatePath(baseDir, desc.getDumpFilePrefix(), (byte) pos, fileName);
            LOG.info("\tLoad back 1 hashtable file from tmp file uri:" + path);
            ObjectInputStream in = new ObjectInputStream(new BufferedInputStream(new FileInputStream(path.toUri().getPath()), 4096));
            try {
                mapJoinTables[pos] = mapJoinTableSerdes[pos].load(in);
            } finally {
                in.close();
            }
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BufferedInputStream(java.io.BufferedInputStream) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) FileInputStream(java.io.FileInputStream) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ObjectInputStream(java.io.ObjectInputStream)

Example 10 with MapredLocalWork

use of org.apache.hadoop.hive.ql.plan.MapredLocalWork in project hive by apache.

the class ExecDriver method main.

@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, HiveException {
    String planFileName = null;
    String jobConfFileName = null;
    boolean noLog = false;
    String files = null;
    String libjars = null;
    boolean localtask = false;
    try {
        for (int i = 0; i < args.length; i++) {
            if (args[i].equals("-plan")) {
                planFileName = args[++i];
            } else if (args[i].equals("-jobconffile")) {
                jobConfFileName = args[++i];
            } else if (args[i].equals("-nolog")) {
                noLog = true;
            } else if (args[i].equals("-files")) {
                files = args[++i];
            } else if (args[i].equals("-libjars")) {
                libjars = args[++i];
            } else if (args[i].equals("-localtask")) {
                localtask = true;
            }
        }
    } catch (IndexOutOfBoundsException e) {
        System.err.println("Missing argument to option");
        printUsage();
    }
    JobConf conf;
    if (localtask) {
        conf = new JobConf(MapredLocalTask.class);
    } else {
        conf = new JobConf(ExecDriver.class);
    }
    if (jobConfFileName != null) {
        conf.addResource(new Path(jobConfFileName));
    }
    // Initialize the resources from command line
    if (files != null) {
        conf.set("tmpfiles", files);
    }
    if (libjars != null) {
        conf.set("tmpjars", libjars);
    }
    if (UserGroupInformation.isSecurityEnabled()) {
        String hadoopAuthToken = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
        if (hadoopAuthToken != null) {
            conf.set("mapreduce.job.credentials.binary", hadoopAuthToken);
        }
    }
    boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);
    String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "").trim();
    if (queryId.isEmpty()) {
        queryId = "unknown-" + System.currentTimeMillis();
        HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, queryId);
    }
    System.setProperty(HiveConf.ConfVars.HIVEQUERYID.toString(), queryId);
    if (noLog) {
        // If started from main(), and noLog is on, we should not output
        // any logs. To turn the log on, please set -Dtest.silent=false
        org.apache.logging.log4j.Logger logger = org.apache.logging.log4j.LogManager.getRootLogger();
        NullAppender appender = NullAppender.createNullAppender();
        appender.addToLogger(logger.getName(), Level.ERROR);
        appender.start();
    } else {
        setupChildLog4j(conf);
    }
    Logger LOG = LoggerFactory.getLogger(ExecDriver.class.getName());
    LogHelper console = new LogHelper(LOG, isSilent);
    if (planFileName == null) {
        console.printError("Must specify Plan File Name");
        printUsage();
    }
    // that it's easy to find reason for local mode execution failures
    for (Appender appender : ((org.apache.logging.log4j.core.Logger) LogManager.getRootLogger()).getAppenders().values()) {
        if (appender instanceof FileAppender) {
            console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
        } else if (appender instanceof RollingFileAppender) {
            console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
        }
    }
    // the plan file should always be in local directory
    Path p = new Path(planFileName);
    FileSystem fs = FileSystem.getLocal(conf);
    InputStream pathData = fs.open(p);
    // child process. so we add it here explicitly
    try {
        // see also - code in CliDriver.java
        ClassLoader loader = conf.getClassLoader();
        if (StringUtils.isNotBlank(libjars)) {
            loader = Utilities.addToClassPath(loader, StringUtils.split(libjars, ","));
        }
        conf.setClassLoader(loader);
        // Also set this to the Thread ContextClassLoader, so new threads will
        // inherit
        // this class loader, and propagate into newly created Configurations by
        // those
        // new threads.
        Thread.currentThread().setContextClassLoader(loader);
    } catch (Exception e) {
        throw new HiveException(e.getMessage(), e);
    }
    int ret;
    if (localtask) {
        memoryMXBean = ManagementFactory.getMemoryMXBean();
        MapredLocalWork plan = SerializationUtilities.deserializePlan(pathData, MapredLocalWork.class);
        MapredLocalTask ed = new MapredLocalTask(plan, conf, isSilent);
        ret = ed.executeInProcess(new DriverContext());
    } else {
        MapredWork plan = SerializationUtilities.deserializePlan(pathData, MapredWork.class);
        ExecDriver ed = new ExecDriver(plan, conf, isSilent);
        ret = ed.execute(new DriverContext());
    }
    if (ret != 0) {
        System.exit(ret);
    }
}
Also used : DriverContext(org.apache.hadoop.hive.ql.DriverContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) NullAppender(org.apache.hadoop.hive.ql.log.NullAppender) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) RollingFileAppender(org.apache.logging.log4j.core.appender.RollingFileAppender) Logger(org.slf4j.Logger) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) Path(org.apache.hadoop.fs.Path) NullAppender(org.apache.hadoop.hive.ql.log.NullAppender) RollingFileAppender(org.apache.logging.log4j.core.appender.RollingFileAppender) Appender(org.apache.logging.log4j.core.Appender) FileAppender(org.apache.logging.log4j.core.appender.FileAppender) RollingFileAppender(org.apache.logging.log4j.core.appender.RollingFileAppender) FileAppender(org.apache.logging.log4j.core.appender.FileAppender) InputStream(java.io.InputStream) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogInitializationException(org.apache.hadoop.hive.common.LogUtils.LogInitializationException) IOException(java.io.IOException) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork)

Aggregations

MapredLocalWork (org.apache.hadoop.hive.ql.plan.MapredLocalWork)15 Path (org.apache.hadoop.fs.Path)12 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Operator (org.apache.hadoop.hive.ql.exec.Operator)6 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)6 ArrayList (java.util.ArrayList)5 LinkedHashMap (java.util.LinkedHashMap)5 Map (java.util.Map)5 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)5 FetchWork (org.apache.hadoop.hive.ql.plan.FetchWork)5 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)5 IOException (java.io.IOException)4 HashMap (java.util.HashMap)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)4 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)4 BucketMapJoinContext (org.apache.hadoop.hive.ql.plan.BucketMapJoinContext)4 DriverContext (org.apache.hadoop.hive.ql.DriverContext)3 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)3 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)3