use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.
the class LocalHiveSparkClient method execute.
@Override
public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
Context ctx = driverContext.getCtx();
HiveConf hiveConf = (HiveConf) ctx.getConf();
refreshLocalResources(sparkWork, hiveConf);
JobConf jobConf = new JobConf(hiveConf);
// Create temporary scratch dir
Path emptyScratchDir;
emptyScratchDir = ctx.getMRTmpPath();
FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
fs.mkdirs(emptyScratchDir);
// Update credential provider location
// the password to the credential provider in already set in the sparkConf
// in HiveSparkClientFactory
HiveConfUtil.updateJobCredentialProviders(jobConf);
SparkCounters sparkCounters = new SparkCounters(sc);
Map<String, List<String>> prefixes = sparkWork.getRequiredCounterPrefix();
if (prefixes != null) {
for (String group : prefixes.keySet()) {
for (String counterName : prefixes.get(group)) {
sparkCounters.createCounter(group, counterName);
}
}
}
SparkReporter sparkReporter = new SparkReporter(sparkCounters);
// Generate Spark plan
SparkPlanGenerator gen = new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
SparkPlan plan = gen.generate(sparkWork);
if (driverContext.isShutdown()) {
throw new HiveException("Operation is cancelled.");
}
// Execute generated plan.
JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
// We use Spark RDD async action to submit job as it's the only way to get jobId now.
JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
// As we always use foreach action to submit RDD graph, it would only trigger one job.
int jobId = future.jobIds().get(0);
LocalSparkJobStatus sparkJobStatus = new LocalSparkJobStatus(sc, jobId, jobMetricsListener, sparkCounters, plan.getCachedRDDIds(), future);
return new LocalSparkJobRef(Integer.toString(jobId), hiveConf, sparkJobStatus, sc);
}
use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.
the class RemoteHiveSparkClient method submit.
private SparkJobRef submit(final DriverContext driverContext, final SparkWork sparkWork) throws Exception {
final Context ctx = driverContext.getCtx();
final HiveConf hiveConf = (HiveConf) ctx.getConf();
refreshLocalResources(sparkWork, hiveConf);
final JobConf jobConf = new JobConf(hiveConf);
//update the credential provider location in the jobConf
HiveConfUtil.updateJobCredentialProviders(jobConf);
// Create temporary scratch dir
final Path emptyScratchDir = ctx.getMRTmpPath();
FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
fs.mkdirs(emptyScratchDir);
byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf);
byte[] scratchDirBytes = KryoSerializer.serialize(emptyScratchDir);
byte[] sparkWorkBytes = KryoSerializer.serialize(sparkWork);
JobStatusJob job = new JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes);
if (driverContext.isShutdown()) {
throw new HiveException("Operation is cancelled.");
}
JobHandle<Serializable> jobHandle = remoteClient.submit(job);
RemoteSparkJobStatus sparkJobStatus = new RemoteSparkJobStatus(remoteClient, jobHandle, sparkClientTimtout);
return new RemoteSparkJobRef(hiveConf, jobHandle, sparkJobStatus);
}
use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.
the class ExecDriver method generateCmdLine.
/**
* Given a Hive Configuration object - generate a command line fragment for passing such
* configuration information to ExecDriver.
*/
public static String generateCmdLine(HiveConf hconf, Context ctx) throws IOException {
HiveConf tempConf = new HiveConf();
Path hConfFilePath = new Path(ctx.getLocalTmpPath(), JOBCONF_FILENAME);
OutputStream out = null;
Properties deltaP = hconf.getChangedProperties();
boolean hadoopLocalMode = ShimLoader.getHadoopShims().isLocalMode(hconf);
String hadoopSysDir = "mapred.system.dir";
String hadoopWorkDir = "mapred.local.dir";
for (Object one : deltaP.keySet()) {
String oneProp = (String) one;
if (hadoopLocalMode && (oneProp.equals(hadoopSysDir) || oneProp.equals(hadoopWorkDir))) {
continue;
}
tempConf.set(oneProp, hconf.get(oneProp));
}
// Workaround is to rename map red working dir to a temp dir in such cases
if (hadoopLocalMode) {
tempConf.set(hadoopSysDir, hconf.get(hadoopSysDir) + "/" + Utilities.randGen.nextInt());
tempConf.set(hadoopWorkDir, hconf.get(hadoopWorkDir) + "/" + Utilities.randGen.nextInt());
}
try {
out = FileSystem.getLocal(hconf).create(hConfFilePath);
tempConf.writeXml(out);
} finally {
if (out != null) {
out.close();
}
}
return " -jobconffile " + hConfFilePath.toString();
}
use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.
the class CompactIndexHandler method generateIndexQuery.
@Override
public void generateIndexQuery(List<Index> indexes, ExprNodeDesc predicate, ParseContext pctx, HiveIndexQueryContext queryContext) {
Index index = indexes.get(0);
DecomposedPredicate decomposedPredicate = decomposePredicate(predicate, index, queryContext.getQueryPartitions());
if (decomposedPredicate == null) {
queryContext.setQueryTasks(null);
// abort if we couldn't pull out anything from the predicate
return;
}
// pass residual predicate back out for further processing
queryContext.setResidualPredicate(decomposedPredicate.residualPredicate);
// setup TableScanOperator to change input format for original query
queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName());
// Build reentrant QL for index query
StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
String tmpFile = pctx.getContext().getMRTmpPath().toUri().toString();
queryContext.setIndexIntermediateFile(tmpFile);
// QL includes " around file name
qlCommand.append("\"" + tmpFile + "\" ");
qlCommand.append("SELECT `_bucketname` , `_offsets` FROM ");
qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName()));
qlCommand.append(" WHERE ");
String predicateString = decomposedPredicate.pushedPredicate.getExprString();
qlCommand.append(predicateString);
// generate tasks from index query string
LOG.info("Generating tasks for re-entrant QL query: " + qlCommand.toString());
HiveConf queryConf = new HiveConf(pctx.getConf(), CompactIndexHandler.class);
HiveConf.setBoolVar(queryConf, HiveConf.ConfVars.COMPRESSRESULT, false);
Driver driver = new Driver(queryConf);
driver.compile(qlCommand.toString(), false);
if (pctx.getConf().getBoolVar(ConfVars.HIVE_INDEX_COMPACT_BINARY_SEARCH) && useSorted) {
// For now, only works if the predicate is a single condition
MapWork work = null;
String originalInputFormat = null;
for (Task task : driver.getPlan().getRootTasks()) {
// Otherwise something is wrong, log the problem and continue using the default format
if (task.getWork() instanceof MapredWork) {
if (work != null) {
LOG.error("Tried to use a binary search on a compact index but there were an " + "unexpected number (>1) of root level map reduce tasks in the " + "reentrant query plan.");
work.setInputformat(null);
work.setInputFormatSorted(false);
break;
}
if (task.getWork() != null) {
work = ((MapredWork) task.getWork()).getMapWork();
}
String inputFormat = work.getInputformat();
originalInputFormat = inputFormat;
if (inputFormat == null) {
inputFormat = HiveConf.getVar(pctx.getConf(), HiveConf.ConfVars.HIVEINPUTFORMAT);
}
// and BucketizedHiveInputFormat
try {
if (!HiveInputFormat.class.isAssignableFrom(JavaUtils.loadClass(inputFormat))) {
work = null;
break;
}
} catch (ClassNotFoundException e) {
LOG.error("Map reduce work's input format class: " + inputFormat + " was not found. " + "Cannot use the fact the compact index is sorted.");
work = null;
break;
}
work.setInputFormatSorted(true);
}
}
if (work != null) {
// Find the filter operator and expr node which act on the index column and mark them
if (!findIndexColumnFilter(work.getAliasToWork().values())) {
LOG.error("Could not locate the index column's filter operator and expr node. Cannot " + "use the fact the compact index is sorted.");
work.setInputformat(originalInputFormat);
work.setInputFormatSorted(false);
}
}
}
queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs());
queryContext.setQueryTasks(driver.getPlan().getRootTasks());
return;
}
use of org.apache.hadoop.hive.conf.HiveConf in project hive by apache.
the class CompactIndexHandler method getIndexBuilderMapRedTask.
@Override
protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs, List<FieldSchema> indexField, boolean partitioned, PartitionDesc indexTblPartDesc, String indexTableName, PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException {
String indexCols = HiveUtils.getUnparsedColumnNamesFromFieldSchema(indexField);
//form a new insert overwrite query.
StringBuilder command = new StringBuilder();
LinkedHashMap<String, String> partSpec = indexTblPartDesc.getPartSpec();
command.append("INSERT OVERWRITE TABLE " + HiveUtils.unparseIdentifier(dbName) + "." + HiveUtils.unparseIdentifier(indexTableName));
if (partitioned && indexTblPartDesc != null) {
command.append(" PARTITION ( ");
List<String> ret = getPartKVPairStringArray(partSpec);
for (int i = 0; i < ret.size(); i++) {
String partKV = ret.get(i);
command.append(partKV);
if (i < ret.size() - 1) {
command.append(",");
}
}
command.append(" ) ");
}
command.append(" SELECT ");
command.append(indexCols);
command.append(",");
command.append(VirtualColumn.FILENAME.getName());
command.append(",");
command.append(" collect_set (");
command.append(VirtualColumn.BLOCKOFFSET.getName());
command.append(") ");
command.append(" FROM " + HiveUtils.unparseIdentifier(dbName) + "." + HiveUtils.unparseIdentifier(baseTableName));
LinkedHashMap<String, String> basePartSpec = baseTablePartDesc.getPartSpec();
if (basePartSpec != null) {
command.append(" WHERE ");
List<String> pkv = getPartKVPairStringArray(basePartSpec);
for (int i = 0; i < pkv.size(); i++) {
String partKV = pkv.get(i);
command.append(partKV);
if (i < pkv.size() - 1) {
command.append(" AND ");
}
}
}
command.append(" GROUP BY ");
command.append(indexCols + ", " + VirtualColumn.FILENAME.getName());
HiveConf builderConf = new HiveConf(getConf(), CompactIndexHandler.class);
builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES, false);
builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES, false);
builderConf.setBoolVar(HiveConf.ConfVars.HIVEMERGETEZFILES, false);
Task<?> rootTask = IndexUtils.createRootTask(builderConf, inputs, outputs, command, partSpec, indexTableName, dbName);
return rootTask;
}
Aggregations