Search in sources :

Example 81 with HiveConf

use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.

the class LocalHiveSparkClient method execute.

@Override
public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
    Context ctx = driverContext.getCtx();
    HiveConf hiveConf = (HiveConf) ctx.getConf();
    refreshLocalResources(sparkWork, hiveConf);
    JobConf jobConf = new JobConf(hiveConf);
    // Create temporary scratch dir
    Path emptyScratchDir;
    emptyScratchDir = ctx.getMRTmpPath();
    FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
    fs.mkdirs(emptyScratchDir);
    // Update credential provider location
    // the password to the credential provider in already set in the sparkConf
    // in HiveSparkClientFactory
    HiveConfUtil.updateJobCredentialProviders(jobConf);
    SparkCounters sparkCounters = new SparkCounters(sc);
    Map<String, List<String>> prefixes = sparkWork.getRequiredCounterPrefix();
    if (prefixes != null) {
        for (String group : prefixes.keySet()) {
            for (String counterName : prefixes.get(group)) {
                sparkCounters.createCounter(group, counterName);
            }
        }
    }
    SparkReporter sparkReporter = new SparkReporter(sparkCounters);
    // Generate Spark plan
    SparkPlanGenerator gen = new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
    SparkPlan plan = gen.generate(sparkWork);
    if (driverContext.isShutdown()) {
        throw new HiveException("Operation is cancelled.");
    }
    // Execute generated plan.
    JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
    // We use Spark RDD async action to submit job as it's the only way to get jobId now.
    JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
    // As we always use foreach action to submit RDD graph, it would only trigger one job.
    int jobId = future.jobIds().get(0);
    LocalSparkJobStatus sparkJobStatus = new LocalSparkJobStatus(sc, jobId, jobMetricsListener, sparkCounters, plan.getCachedRDDIds(), future);
    return new LocalSparkJobRef(Integer.toString(jobId), hiveConf, sparkJobStatus, sc);
}
Also used : JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Path(org.apache.hadoop.fs.Path) SparkCounters(org.apache.hive.spark.counter.SparkCounters) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BytesWritable(org.apache.hadoop.io.BytesWritable) LocalSparkJobStatus(org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobStatus) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ArrayList(java.util.ArrayList) List(java.util.List) LocalSparkJobRef(org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobRef) JobConf(org.apache.hadoop.mapred.JobConf)

Example 82 with HiveConf

use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.

the class RemoteHiveSparkClient method submit.

private SparkJobRef submit(final DriverContext driverContext, final SparkWork sparkWork) throws Exception {
    final Context ctx = driverContext.getCtx();
    final HiveConf hiveConf = (HiveConf) ctx.getConf();
    refreshLocalResources(sparkWork, hiveConf);
    final JobConf jobConf = new JobConf(hiveConf);
    //update the credential provider location in the jobConf
    HiveConfUtil.updateJobCredentialProviders(jobConf);
    // Create temporary scratch dir
    final Path emptyScratchDir = ctx.getMRTmpPath();
    FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
    fs.mkdirs(emptyScratchDir);
    byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf);
    byte[] scratchDirBytes = KryoSerializer.serialize(emptyScratchDir);
    byte[] sparkWorkBytes = KryoSerializer.serialize(sparkWork);
    JobStatusJob job = new JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes);
    if (driverContext.isShutdown()) {
        throw new HiveException("Operation is cancelled.");
    }
    JobHandle<Serializable> jobHandle = remoteClient.submit(job);
    RemoteSparkJobStatus sparkJobStatus = new RemoteSparkJobStatus(remoteClient, jobHandle, sparkClientTimtout);
    return new RemoteSparkJobRef(hiveConf, jobHandle, sparkJobStatus);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) JobContext(org.apache.hive.spark.client.JobContext) Path(org.apache.hadoop.fs.Path) Serializable(java.io.Serializable) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) RemoteSparkJobRef(org.apache.hadoop.hive.ql.exec.spark.status.impl.RemoteSparkJobRef) FileSystem(org.apache.hadoop.fs.FileSystem) RemoteSparkJobStatus(org.apache.hadoop.hive.ql.exec.spark.status.impl.RemoteSparkJobStatus) HiveConf(org.apache.hadoop.hive.conf.HiveConf) JobConf(org.apache.hadoop.mapred.JobConf)

Example 83 with HiveConf

use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.

the class ExecDriver method generateCmdLine.

/**
   * Given a Hive Configuration object - generate a command line fragment for passing such
   * configuration information to ExecDriver.
   */
public static String generateCmdLine(HiveConf hconf, Context ctx) throws IOException {
    HiveConf tempConf = new HiveConf();
    Path hConfFilePath = new Path(ctx.getLocalTmpPath(), JOBCONF_FILENAME);
    OutputStream out = null;
    Properties deltaP = hconf.getChangedProperties();
    boolean hadoopLocalMode = ShimLoader.getHadoopShims().isLocalMode(hconf);
    String hadoopSysDir = "mapred.system.dir";
    String hadoopWorkDir = "mapred.local.dir";
    for (Object one : deltaP.keySet()) {
        String oneProp = (String) one;
        if (hadoopLocalMode && (oneProp.equals(hadoopSysDir) || oneProp.equals(hadoopWorkDir))) {
            continue;
        }
        tempConf.set(oneProp, hconf.get(oneProp));
    }
    // Workaround is to rename map red working dir to a temp dir in such cases
    if (hadoopLocalMode) {
        tempConf.set(hadoopSysDir, hconf.get(hadoopSysDir) + "/" + Utilities.randGen.nextInt());
        tempConf.set(hadoopWorkDir, hconf.get(hadoopWorkDir) + "/" + Utilities.randGen.nextInt());
    }
    try {
        out = FileSystem.getLocal(hconf).create(hConfFilePath);
        tempConf.writeXml(out);
    } finally {
        if (out != null) {
            out.close();
        }
    }
    return " -jobconffile " + hConfFilePath.toString();
}
Also used : Path(org.apache.hadoop.fs.Path) OutputStream(java.io.OutputStream) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Properties(java.util.Properties)

Example 84 with HiveConf

use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.

the class CompactIndexHandler method generateIndexQuery.

@Override
public void generateIndexQuery(List<Index> indexes, ExprNodeDesc predicate, ParseContext pctx, HiveIndexQueryContext queryContext) {
    Index index = indexes.get(0);
    DecomposedPredicate decomposedPredicate = decomposePredicate(predicate, index, queryContext.getQueryPartitions());
    if (decomposedPredicate == null) {
        queryContext.setQueryTasks(null);
        // abort if we couldn't pull out anything from the predicate
        return;
    }
    // pass residual predicate back out for further processing
    queryContext.setResidualPredicate(decomposedPredicate.residualPredicate);
    // setup TableScanOperator to change input format for original query
    queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());
    // Build reentrant QL for index query
    StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
    String tmpFile = pctx.getContext().getMRTmpPath().toUri().toString();
    queryContext.setIndexIntermediateFile(tmpFile);
    // QL includes " around file name
    qlCommand.append("\"" + tmpFile + "\" ");
    qlCommand.append("SELECT `_bucketname` ,  `_offsets` FROM ");
    qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName()));
    qlCommand.append(" WHERE ");
    String predicateString = decomposedPredicate.pushedPredicate.getExprString();
    qlCommand.append(predicateString);
    // generate tasks from index query string
    LOG.info("Generating tasks for re-entrant QL query: " + qlCommand.toString());
    HiveConf queryConf = new HiveConf(pctx.getConf(), CompactIndexHandler.class);
    HiveConf.setBoolVar(queryConf, HiveConf.ConfVars.COMPRESSRESULT, false);
    Driver driver = new Driver(queryConf);
    driver.compile(qlCommand.toString(), false);
    if (pctx.getConf().getBoolVar(ConfVars.HIVE_INDEX_COMPACT_BINARY_SEARCH) && useSorted) {
        // For now, only works if the predicate is a single condition
        MapWork work = null;
        String originalInputFormat = null;
        for (Task task : driver.getPlan().getRootTasks()) {
            // Otherwise something is wrong, log the problem and continue using the default format
            if (task.getWork() instanceof MapredWork) {
                if (work != null) {
                    LOG.error("Tried to use a binary search on a compact index but there were an " + "unexpected number (>1) of root level map reduce tasks in the " + "reentrant query plan.");
                    work.setInputformat(null);
                    work.setInputFormatSorted(false);
                    break;
                }
                if (task.getWork() != null) {
                    work = ((MapredWork) task.getWork()).getMapWork();
                }
                String inputFormat = work.getInputformat();
                originalInputFormat = inputFormat;
                if (inputFormat == null) {
                    inputFormat = HiveConf.getVar(pctx.getConf(), HiveConf.ConfVars.HIVEINPUTFORMAT);
                }
                // and BucketizedHiveInputFormat
                try {
                    if (!HiveInputFormat.class.isAssignableFrom(JavaUtils.loadClass(inputFormat))) {
                        work = null;
                        break;
                    }
                } catch (ClassNotFoundException e) {
                    LOG.error("Map reduce work's input format class: " + inputFormat + " was not found. " + "Cannot use the fact the compact index is sorted.");
                    work = null;
                    break;
                }
                work.setInputFormatSorted(true);
            }
        }
        if (work != null) {
            // Find the filter operator and expr node which act on the index column and mark them
            if (!findIndexColumnFilter(work.getAliasToWork().values())) {
                LOG.error("Could not locate the index column's filter operator and expr node. Cannot " + "use the fact the compact index is sorted.");
                work.setInputformat(originalInputFormat);
                work.setInputFormatSorted(false);
            }
        }
    }
    queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
    queryContext.setQueryTasks(driver.getPlan().getRootTasks());
    return;
}
Also used : DecomposedPredicate(org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) Task(org.apache.hadoop.hive.ql.exec.Task) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) Driver(org.apache.hadoop.hive.ql.Driver) Index(org.apache.hadoop.hive.metastore.api.Index) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 85 with HiveConf

use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.

the class CompactIndexHandler method getIndexBuilderMapRedTask.

@Override
protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs, List<FieldSchema> indexField, boolean partitioned, PartitionDesc indexTblPartDesc, String indexTableName, PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException {
    String indexCols = HiveUtils.getUnparsedColumnNamesFromFieldSchema(indexField);
    //form a new insert overwrite query.
    StringBuilder command = new StringBuilder();
    LinkedHashMap<String, String> partSpec = indexTblPartDesc.getPartSpec();
    command.append("INSERT OVERWRITE TABLE " + HiveUtils.unparseIdentifier(dbName) + "." + HiveUtils.unparseIdentifier(indexTableName));
    if (partitioned && indexTblPartDesc != null) {
        command.append(" PARTITION ( ");
        List<String> ret = getPartKVPairStringArray(partSpec);
        for (int i = 0; i < ret.size(); i++) {
            String partKV = ret.get(i);
            command.append(partKV);
            if (i < ret.size() - 1) {
                command.append(",");
            }
        }
        command.append(" ) ");
    }
    command.append(" SELECT ");
    command.append(indexCols);
    command.append(",");
    command.append(VirtualColumn.FILENAME.getName());
    command.append(",");
    command.append(" collect_set (");
    command.append(VirtualColumn.BLOCKOFFSET.getName());
    command.append(") ");
    command.append(" FROM " + HiveUtils.unparseIdentifier(dbName) + "." + HiveUtils.unparseIdentifier(baseTableName));
    LinkedHashMap<String, String> basePartSpec = baseTablePartDesc.getPartSpec();
    if (basePartSpec != null) {
        command.append(" WHERE ");
        List<String> pkv = getPartKVPairStringArray(basePartSpec);
        for (int i = 0; i < pkv.size(); i++) {
            String partKV = pkv.get(i);
            command.append(partKV);
            if (i < pkv.size() - 1) {
                command.append(" AND ");
            }
        }
    }
    command.append(" GROUP BY ");
    command.append(indexCols + ", " + VirtualColumn.FILENAME.getName());
    HiveConf builderConf = new HiveConf(getConf(), CompactIndexHandler.class);
    builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES, false);
    builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false);
    builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGETEZFILES, false);
    Task<?> rootTask = IndexUtils.createRootTask(builderConf, inputs, outputs, command, partSpec, indexTableName, dbName);
    return rootTask;
}
Also used : HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Aggregations

HiveConf (org.apache.hadoop.hive.conf.HiveConf)404 BeforeClass (org.junit.BeforeClass)73 Test (org.junit.Test)66 Path (org.apache.hadoop.fs.Path)54 Before (org.junit.Before)50 Driver (org.apache.hadoop.hive.ql.Driver)46 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)44 IOException (java.io.IOException)39 ArrayList (java.util.ArrayList)37 File (java.io.File)31 HashMap (java.util.HashMap)26 FileSystem (org.apache.hadoop.fs.FileSystem)26 SessionState (org.apache.hadoop.hive.ql.session.SessionState)22 LinkedHashMap (java.util.LinkedHashMap)17 List (java.util.List)16 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)15 MiniHS2 (org.apache.hive.jdbc.miniHS2.MiniHS2)14 Map (java.util.Map)12 HiveMetaStoreClient (org.apache.hadoop.hive.metastore.HiveMetaStoreClient)12 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)12