use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class DagUtils method createVertex.
private Vertex createVertex(JobConf conf, MergeJoinWork mergeJoinWork, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fs, Path mrScratchDir, Context ctx, VertexType vertexType) throws Exception {
Utilities.setMergeWork(conf, mergeJoinWork, mrScratchDir, false);
if (mergeJoinWork.getMainWork() instanceof MapWork) {
List<BaseWork> mapWorkList = mergeJoinWork.getBaseWorkList();
MapWork mapWork = (MapWork) (mergeJoinWork.getMainWork());
Vertex mergeVx = createVertex(conf, mapWork, appJarLr, additionalLr, fs, mrScratchDir, ctx, vertexType);
conf.setClass("mapred.input.format.class", HiveInputFormat.class, InputFormat.class);
// mapreduce.tez.input.initializer.serialize.event.payload should be set
// to false when using this plug-in to avoid getting a serialized event at run-time.
conf.setBoolean("mapreduce.tez.input.initializer.serialize.event.payload", false);
for (int i = 0; i < mapWorkList.size(); i++) {
mapWork = (MapWork) (mapWorkList.get(i));
conf.set(TEZ_MERGE_CURRENT_MERGE_FILE_PREFIX, mapWork.getName());
conf.set(Utilities.INPUT_NAME, mapWork.getName());
LOG.info("Going through each work and adding MultiMRInput");
mergeVx.addDataSource(mapWork.getName(), MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
}
VertexManagerPluginDescriptor desc = VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
// the +1 to the size is because of the main work.
CustomVertexConfiguration vertexConf = new CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf().getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(), mapWorkList.size() + 1);
DataOutputBuffer dob = new DataOutputBuffer();
vertexConf.write(dob);
byte[] userPayload = dob.getData();
desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
mergeVx.setVertexManagerPlugin(desc);
return mergeVx;
} else {
Vertex mergeVx = createVertex(conf, (ReduceWork) mergeJoinWork.getMainWork(), appJarLr, additionalLr, fs, mrScratchDir, ctx);
return mergeVx;
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class SparkMergeFileRecordHandler method init.
@SuppressWarnings("unchecked")
@Override
public <K, V> void init(JobConf job, OutputCollector<K, V> output, Reporter reporter) throws Exception {
super.init(job, output, reporter);
try {
jc = job;
MapWork mapWork = Utilities.getMapWork(job);
if (mapWork instanceof MergeFileWork) {
MergeFileWork mergeFileWork = (MergeFileWork) mapWork;
String alias = mergeFileWork.getAliasToWork().keySet().iterator().next();
op = mergeFileWork.getAliasToWork().get(alias);
if (op instanceof AbstractFileMergeOperator) {
mergeOp = (AbstractFileMergeOperator<? extends FileMergeDesc>) op;
mergeOp.initializeOp(jc);
row = new Object[2];
abort = false;
} else {
abort = true;
throw new IllegalStateException("Merge file work's top operator should be an" + " instance of AbstractFileMergeOperator");
}
} else {
abort = true;
throw new IllegalStateException("Map work should be a merge file work.");
}
LOG.info(mergeOp.dump(0));
} catch (HiveException e) {
abort = true;
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class SparkPlanGenerator method generateParentTran.
// Generate (possibly get from a cached result) parent SparkTran
private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork, BaseWork work) throws Exception {
if (cloneToWork.containsKey(work)) {
BaseWork originalWork = cloneToWork.get(work);
if (workToParentWorkTranMap.containsKey(originalWork)) {
return workToParentWorkTranMap.get(originalWork);
}
}
SparkTran result;
if (work instanceof MapWork) {
result = generateMapInput(sparkPlan, (MapWork) work);
sparkPlan.addTran(result);
} else if (work instanceof ReduceWork) {
List<BaseWork> parentWorks = sparkWork.getParents(work);
result = generate(sparkPlan, sparkWork.getEdgeProperty(parentWorks.get(0), work), cloneToWork.containsKey(work));
sparkPlan.addTran(result);
for (BaseWork parentWork : parentWorks) {
sparkPlan.connect(workToTranMap.get(parentWork), result);
}
} else {
throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " + "but found " + work.getClass().getName());
}
if (cloneToWork.containsKey(work)) {
workToParentWorkTranMap.put(cloneToWork.get(work), result);
}
return result;
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class SplitGrouper method generateGroupedSplits.
/** Generate groups of splits, separated by schema evolution boundaries */
public Multimap<Integer, InputSplit> generateGroupedSplits(JobConf jobConf, Configuration conf, InputSplit[] splits, float waves, int availableSlots, String inputName, boolean groupAcrossFiles, SplitLocationProvider locationProvider) throws Exception {
MapWork work = populateMapWork(jobConf, inputName);
// ArrayListMultimap is important here to retain the ordering for the splits.
Multimap<Integer, InputSplit> bucketSplitMultiMap = ArrayListMultimap.<Integer, InputSplit>create();
int i = 0;
InputSplit prevSplit = null;
for (InputSplit s : splits) {
// schema boundaries
if (schemaEvolved(s, prevSplit, groupAcrossFiles, work)) {
++i;
prevSplit = s;
}
bucketSplitMultiMap.put(i, s);
}
LOG.info("# Src groups for split generation: " + (i + 1));
// group them into the chunks we want
Multimap<Integer, InputSplit> groupedSplits = this.group(jobConf, bucketSplitMultiMap, availableSlots, waves, locationProvider);
return groupedSplits;
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project haivvreo by jghoman.
the class AvroSerDe method determineCorrectProperties.
// Hive passes different properties in at different times. If we're in a MR job,
// we'll get properties for the partition rather than the table, which will give
// us old values for the schema (if it's evolved). Therefore, in an MR job
// we need to extract the table properties.
// Also, in join queries, multiple properties will be included, so we need
// to extract out the one appropriate to the table we're serde'ing.
private Properties determineCorrectProperties(Configuration configuration, Properties properties) {
if ((configuration instanceof JobConf) && HaivvreoUtils.insideMRJob((JobConf) configuration)) {
LOG.info("In MR job, extracting table-level properties");
MapWork mapWork = Utilities.getMapWork(configuration);
LinkedHashMap<String, PartitionDesc> a = mapWork.getAliasToPartnInfo();
if (a.size() == 1) {
LOG.info("Only one PartitionDesc found. Returning that Properties");
PartitionDesc p = a.values().iterator().next();
TableDesc tableDesc = p.getTableDesc();
return tableDesc.getProperties();
} else {
String tableName = properties.getProperty("name");
LOG.info("Multiple PartitionDescs. Return properties for " + tableName);
for (Map.Entry<String, PartitionDesc> partitionDescs : a.entrySet()) {
Properties p = partitionDescs.getValue().getTableDesc().getProperties();
if (p.get("name").equals(tableName)) {
// We've found the matching table partition
LOG.info("Matched table name against " + partitionDescs.getKey() + ", return its properties");
return p;
}
}
// Didn't find anything in partitions to match on. WARN, at least.
LOG.warn("Couldn't find any matching properties for table: " + tableName + ". Returning original properties");
}
}
return properties;
}
Aggregations