use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class LlapRecordReader method create.
/**
* Creates the record reader and checks the input-specific compatibility.
* @return The reader if the split can be read, null otherwise.
*/
public static LlapRecordReader create(JobConf job, FileSplit split, List<Integer> tableIncludedCols, String hostName, ColumnVectorProducer cvp, ExecutorService executor, InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe, Reporter reporter, Configuration daemonConf) throws IOException, HiveException {
MapWork mapWork = findMapWork(job);
// No compatible MapWork.
if (mapWork == null)
return null;
LlapRecordReader rr = new LlapRecordReader(mapWork, job, split, tableIncludedCols, hostName, cvp, executor, sourceInputFormat, sourceSerDe, reporter, daemonConf);
if (!rr.checkOrcSchemaEvolution()) {
rr.close();
return null;
}
return rr;
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class VectorizedColumnReaderTestBase method initialVectorizedRowBatchCtx.
protected static void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException {
MapWork mapWork = new MapWork();
VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx();
rbCtx.init(createStructObjectInspector(conf), new String[0]);
mapWork.setVectorMode(true);
mapWork.setVectorizedRowBatchCtx(rbCtx);
Utilities.setMapWork(conf, mapWork);
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class TestGenTezWork method setUp.
/**
* @throws java.lang.Exception
*/
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
// Init conf
final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
SessionState.start(conf);
// Init parse context
final ParseContext pctx = new ParseContext();
pctx.setContext(new Context(conf));
ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<? extends Serializable>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
proc = new GenTezWork(new GenTezUtils() {
@Override
protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", root);
mapWork.setAliasToWork(map);
return;
}
});
CompilationOpContext cCtx = new CompilationOpContext();
fs = new FileSinkOperator(cCtx);
fs.setConf(new FileSinkDesc());
rs = new ReduceSinkOperator(cCtx);
rs.setConf(new ReduceSinkDesc());
TableDesc tableDesc = new TableDesc();
tableDesc.setProperties(new Properties());
rs.getConf().setKeySerializeInfo(tableDesc);
ts = new TableScanOperator(cCtx);
ts.setConf(new TableScanDesc(null));
ts.getChildOperators().add(rs);
rs.getParentOperators().add(ts);
rs.getChildOperators().add(fs);
fs.getParentOperators().add(rs);
ctx.preceedingWork = null;
ctx.currentRootOperator = ts;
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project haivvreo by jghoman.
the class AvroSerDe method determineCorrectProperties.
// Hive passes different properties in at different times. If we're in a MR job,
// we'll get properties for the partition rather than the table, which will give
// us old values for the schema (if it's evolved). Therefore, in an MR job
// we need to extract the table properties.
// Also, in join queries, multiple properties will be included, so we need
// to extract out the one appropriate to the table we're serde'ing.
private Properties determineCorrectProperties(Configuration configuration, Properties properties) {
if ((configuration instanceof JobConf) && HaivvreoUtils.insideMRJob((JobConf) configuration)) {
LOG.info("In MR job, extracting table-level properties");
MapWork mapWork = Utilities.getMapWork(configuration);
LinkedHashMap<String, PartitionDesc> a = mapWork.getAliasToPartnInfo();
if (a.size() == 1) {
LOG.info("Only one PartitionDesc found. Returning that Properties");
PartitionDesc p = a.values().iterator().next();
TableDesc tableDesc = p.getTableDesc();
return tableDesc.getProperties();
} else {
String tableName = properties.getProperty("name");
LOG.info("Multiple PartitionDescs. Return properties for " + tableName);
for (Map.Entry<String, PartitionDesc> partitionDescs : a.entrySet()) {
Properties p = partitionDescs.getValue().getTableDesc().getProperties();
if (p.get("name").equals(tableName)) {
// We've found the matching table partition
LOG.info("Matched table name against " + partitionDescs.getKey() + ", return its properties");
return p;
}
}
// Didn't find anything in partitions to match on. WARN, at least.
LOG.warn("Couldn't find any matching properties for table: " + tableName + ". Returning original properties");
}
}
return properties;
}
use of org.apache.hadoop.hive.ql.plan.MapWork in project hive by apache.
the class DagUtils method createVertex.
/**
* Create a vertex from a given work object.
*
* @param conf JobConf to be used to this execution unit
* @param work The instance of BaseWork representing the actual work to be performed
* by this vertex.
* @param scratchDir HDFS scratch dir for this execution unit.
* @param appJarLr Local resource for hive-exec.
* @param additionalLr
* @param fileSystem FS corresponding to scratchDir and LocalResources
* @param ctx This query's context
* @return Vertex
*/
@SuppressWarnings("deprecation")
public Vertex createVertex(JobConf conf, BaseWork work, Path scratchDir, LocalResource appJarLr, List<LocalResource> additionalLr, FileSystem fileSystem, Context ctx, boolean hasChildren, TezWork tezWork, VertexType vertexType) throws Exception {
Vertex v = null;
// BaseWork.
if (work instanceof MapWork) {
v = createVertex(conf, (MapWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, vertexType);
} else if (work instanceof ReduceWork) {
v = createVertex(conf, (ReduceWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx);
} else if (work instanceof MergeJoinWork) {
v = createVertex(conf, (MergeJoinWork) work, appJarLr, additionalLr, fileSystem, scratchDir, ctx, vertexType);
} else {
// something is seriously wrong if this is happening
throw new HiveException(ErrorMsg.GENERIC_ERROR.getErrorCodedMsg());
}
// initialize stats publisher if necessary
if (work.isGatheringStats()) {
StatsPublisher statsPublisher;
StatsFactory factory = StatsFactory.newFactory(conf);
if (factory != null) {
StatsCollectionContext sCntxt = new StatsCollectionContext(conf);
sCntxt.setStatsTmpDirs(Utilities.getStatsTmpDirs(work, conf));
statsPublisher = factory.getStatsPublisher();
if (!statsPublisher.init(sCntxt)) {
// creating stats table if not exists
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
}
}
}
}
// final vertices need to have at least one output
if (!hasChildren) {
v.addDataSink("out_" + work.getName(), new DataSinkDescriptor(OutputDescriptor.create(MROutput.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(conf)), null, null));
}
return v;
}
Aggregations