use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class TestOperators method testScriptOperator.
public void testScriptOperator() throws Throwable {
try {
System.out.println("Testing Script Operator");
// col1
ExprNodeDesc exprDesc1 = TestExecDriver.getStringColumn("col1");
// col2
ExprNodeDesc expr1 = TestExecDriver.getStringColumn("col0");
ExprNodeDesc expr2 = new ExprNodeConstantDesc("1");
ExprNodeDesc exprDesc2 = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("concat", expr1, expr2);
// select operator to project these two columns
ArrayList<ExprNodeDesc> earr = new ArrayList<ExprNodeDesc>();
earr.add(exprDesc1);
earr.add(exprDesc2);
ArrayList<String> outputCols = new ArrayList<String>();
for (int i = 0; i < earr.size(); i++) {
outputCols.add("_col" + i);
}
SelectDesc selectCtx = new SelectDesc(earr, outputCols);
Operator<SelectDesc> op = OperatorFactory.get(new CompilationOpContext(), SelectDesc.class);
op.setConf(selectCtx);
// scriptOperator to echo the output of the select
TableDesc scriptOutput = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "a,b");
TableDesc scriptInput = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "a,b");
ScriptDesc sd = new ScriptDesc("cat", scriptOutput, TextRecordWriter.class, scriptInput, TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key"));
Operator<ScriptDesc> sop = OperatorFactory.getAndMakeChild(sd, op);
// Collect operator to observe the output of the script
CollectDesc cd = new CollectDesc(Integer.valueOf(10));
CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, sop);
op.initialize(new JobConf(TestOperators.class), new ObjectInspector[] { r[0].oi });
// evaluate on row
for (int i = 0; i < 5; i++) {
op.process(r[i].o, 0);
}
op.close(false);
InspectableObject io = new InspectableObject();
for (int i = 0; i < 5; i++) {
cdop.retrieve(io);
System.out.println("[" + i + "] io.o=" + io.o);
System.out.println("[" + i + "] io.oi=" + io.oi);
StructObjectInspector soi = (StructObjectInspector) io.oi;
assert (soi != null);
StructField a = soi.getStructFieldRef("a");
StructField b = soi.getStructFieldRef("b");
assertEquals("" + (i + 1), ((PrimitiveObjectInspector) a.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(io.o, a)));
assertEquals((i) + "1", ((PrimitiveObjectInspector) b.getFieldObjectInspector()).getPrimitiveJavaObject(soi.getStructFieldData(io.o, b)));
}
System.out.println("Script Operator ok");
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class TestOperators method testMapOperator.
public void testMapOperator() throws Throwable {
try {
System.out.println("Testing Map Operator");
// initialize configuration
JobConf hconf = new JobConf(TestOperators.class);
hconf.set(MRJobConfig.MAP_INPUT_FILE, "hdfs:///testDir/testFile");
IOContextMap.get(hconf).setInputPath(new Path("hdfs:///testDir/testFile"));
// initialize pathToAliases
ArrayList<String> aliases = new ArrayList<String>();
aliases.add("a");
aliases.add("b");
LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
pathToAliases.put(new Path("hdfs:///testDir"), aliases);
// initialize pathToTableInfo
// Default: treat the table as a single column "col"
TableDesc td = Utilities.defaultTd;
PartitionDesc pd = new PartitionDesc(td, null);
LinkedHashMap<Path, org.apache.hadoop.hive.ql.plan.PartitionDesc> pathToPartitionInfo = new LinkedHashMap<>();
pathToPartitionInfo.put(new Path("hdfs:///testDir"), pd);
// initialize aliasToWork
CompilationOpContext ctx = new CompilationOpContext();
CollectDesc cd = new CollectDesc(Integer.valueOf(1));
CollectOperator cdop1 = (CollectOperator) OperatorFactory.get(ctx, CollectDesc.class);
cdop1.setConf(cd);
CollectOperator cdop2 = (CollectOperator) OperatorFactory.get(ctx, CollectDesc.class);
cdop2.setConf(cd);
LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
aliasToWork.put("a", cdop1);
aliasToWork.put("b", cdop2);
// initialize mapredWork
MapredWork mrwork = new MapredWork();
mrwork.getMapWork().setPathToAliases(pathToAliases);
mrwork.getMapWork().setPathToPartitionInfo(pathToPartitionInfo);
mrwork.getMapWork().setAliasToWork(aliasToWork);
// get map operator and initialize it
MapOperator mo = new MapOperator(new CompilationOpContext());
mo.initializeAsRoot(hconf, mrwork.getMapWork());
Text tw = new Text();
InspectableObject io1 = new InspectableObject();
InspectableObject io2 = new InspectableObject();
for (int i = 0; i < 5; i++) {
String answer = "[[" + i + ", " + (i + 1) + ", " + (i + 2) + "]]";
tw.set("" + i + "" + (i + 1) + "" + (i + 2));
mo.process(tw);
cdop1.retrieve(io1);
cdop2.retrieve(io2);
System.out.println("io1.o.toString() = " + io1.o.toString());
System.out.println("io2.o.toString() = " + io2.o.toString());
System.out.println("answer.toString() = " + answer.toString());
assertEquals(answer.toString(), io1.o.toString());
assertEquals(answer.toString(), io2.o.toString());
}
System.out.println("Map Operator ok");
} catch (Throwable e) {
e.printStackTrace();
throw (e);
}
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class TestGenTezWork method setUp.
/**
* @throws java.lang.Exception
*/
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
// Init conf
final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
SessionState.start(conf);
// Init parse context
final ParseContext pctx = new ParseContext();
pctx.setContext(new Context(conf));
ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<? extends Serializable>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
proc = new GenTezWork(new GenTezUtils() {
@Override
protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", root);
mapWork.setAliasToWork(map);
return;
}
});
CompilationOpContext cCtx = new CompilationOpContext();
fs = new FileSinkOperator(cCtx);
fs.setConf(new FileSinkDesc());
rs = new ReduceSinkOperator(cCtx);
rs.setConf(new ReduceSinkDesc());
TableDesc tableDesc = new TableDesc();
tableDesc.setProperties(new Properties());
rs.getConf().setKeySerializeInfo(tableDesc);
ts = new TableScanOperator(cCtx);
ts.setConf(new TableScanDesc(null));
ts.getChildOperators().add(rs);
rs.getParentOperators().add(ts);
rs.getChildOperators().add(fs);
fs.getParentOperators().add(rs);
ctx.preceedingWork = null;
ctx.currentRootOperator = ts;
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class SparkMapRecordHandler method init.
@Override
public <K, V> void init(JobConf job, OutputCollector<K, V> output, Reporter reporter) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
super.init(job, output, reporter);
isLogInfoEnabled = LOG.isInfoEnabled();
try {
jc = job;
execContext = new ExecMapperContext(jc);
// create map and fetch operators
MapWork mrwork = Utilities.getMapWork(job);
CompilationOpContext runtimeCtx = new CompilationOpContext();
if (mrwork.getVectorMode()) {
mo = new VectorMapOperator(runtimeCtx);
} else {
mo = new MapOperator(runtimeCtx);
}
mo.setConf(mrwork);
// initialize map operator
mo.initialize(jc, null);
mo.setChildren(job);
LOG.info(mo.dump(0));
// initialize map local work
localWork = mrwork.getMapRedLocalWork();
execContext.setLocalWork(localWork);
MapredContext.init(true, new JobConf(jc));
MapredContext.get().setReporter(reporter);
mo.passExecContext(execContext);
mo.initializeLocalWork(jc);
mo.initializeMapOperator(jc);
OperatorUtils.setChildrenCollector(mo.getChildOperators(), output);
mo.setReporter(rp);
if (localWork == null) {
return;
}
//The following code is for mapjoin
//initialize all the dummy ops
LOG.info("Initializing dummy operator");
List<Operator<? extends OperatorDesc>> dummyOps = localWork.getDummyParentOp();
for (Operator<? extends OperatorDesc> dummyOp : dummyOps) {
dummyOp.setExecContext(execContext);
dummyOp.initialize(jc, null);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Map operator initialization failed: " + e, e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class PartialScanTask method main.
public static void main(String[] args) {
String inputPathStr = null;
String outputDir = null;
String jobConfFileName = null;
try {
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-input")) {
inputPathStr = args[++i];
} else if (args[i].equals("-jobconffile")) {
jobConfFileName = args[++i];
} else if (args[i].equals("-outputDir")) {
outputDir = args[++i];
}
}
} catch (IndexOutOfBoundsException e) {
System.err.println("Missing argument to option");
printUsage();
}
if (inputPathStr == null || outputDir == null || outputDir.trim().equals("")) {
printUsage();
}
List<Path> inputPaths = new ArrayList<Path>();
String[] paths = inputPathStr.split(INPUT_SEPERATOR);
if (paths == null || paths.length == 0) {
printUsage();
}
FileSystem fs = null;
JobConf conf = new JobConf(PartialScanTask.class);
for (String path : paths) {
try {
Path pathObj = new Path(path);
if (fs == null) {
fs = FileSystem.get(pathObj.toUri(), conf);
}
FileStatus fstatus = fs.getFileStatus(pathObj);
if (fstatus.isDir()) {
FileStatus[] fileStatus = fs.listStatus(pathObj);
for (FileStatus st : fileStatus) {
inputPaths.add(st.getPath());
}
} else {
inputPaths.add(fstatus.getPath());
}
} catch (IOException e) {
e.printStackTrace(System.err);
}
}
if (jobConfFileName != null) {
conf.addResource(new Path(jobConfFileName));
}
org.slf4j.Logger LOG = LoggerFactory.getLogger(PartialScanTask.class.getName());
boolean isSilent = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESESSIONSILENT);
LogHelper console = new LogHelper(LOG, isSilent);
// that it's easy to find reason for local mode execution failures
for (Appender appender : ((Logger) LogManager.getRootLogger()).getAppenders().values()) {
if (appender instanceof FileAppender) {
console.printInfo("Execution log at: " + ((FileAppender) appender).getFileName());
} else if (appender instanceof RollingFileAppender) {
console.printInfo("Execution log at: " + ((RollingFileAppender) appender).getFileName());
}
}
QueryState queryState = new QueryState(new HiveConf(conf, PartialScanTask.class));
PartialScanWork mergeWork = new PartialScanWork(inputPaths);
DriverContext driverCxt = new DriverContext();
PartialScanTask taskExec = new PartialScanTask();
taskExec.initialize(queryState, null, driverCxt, new CompilationOpContext());
taskExec.setWork(mergeWork);
int ret = taskExec.execute(driverCxt);
if (ret != 0) {
System.exit(2);
}
}
Aggregations