use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class TestTezTask method setUp.
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
utils = mock(DagUtils.class);
fs = mock(FileSystem.class);
path = mock(Path.class);
when(path.getFileSystem(any(Configuration.class))).thenReturn(fs);
when(utils.getTezDir(any(Path.class))).thenReturn(path);
when(utils.createVertex(any(JobConf.class), any(BaseWork.class), any(Path.class), any(LocalResource.class), any(List.class), any(FileSystem.class), any(Context.class), anyBoolean(), any(TezWork.class), any(VertexType.class))).thenAnswer(new Answer<Vertex>() {
@Override
public Vertex answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return Vertex.create(((BaseWork) args[1]).getName(), mock(ProcessorDescriptor.class), 0, mock(Resource.class));
}
});
when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), any(TezEdgeProperty.class), any(VertexType.class))).thenAnswer(new Answer<Edge>() {
@Override
public Edge answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return Edge.create((Vertex) args[1], (Vertex) args[2], mock(EdgeProperty.class));
}
});
work = new TezWork("", null);
mws = new MapWork[] { new MapWork(), new MapWork() };
rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };
work.addAll(mws);
work.addAll(rws);
int i = 0;
for (BaseWork w : work.getAllWork()) {
w.setName("Work " + (++i));
}
op = mock(Operator.class);
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", op);
mws[0].setAliasToWork(map);
mws[1].setAliasToWork(map);
LinkedHashMap<Path, ArrayList<String>> pathMap = new LinkedHashMap<>();
ArrayList<String> aliasList = new ArrayList<String>();
aliasList.add("foo");
pathMap.put(new Path("foo"), aliasList);
mws[0].setPathToAliases(pathMap);
mws[1].setPathToAliases(pathMap);
rws[0].setReducer(op);
rws[1].setReducer(op);
TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
work.connect(mws[0], rws[0], edgeProp);
work.connect(mws[1], rws[0], edgeProp);
work.connect(rws[0], rws[1], edgeProp);
task = new TezTask(utils);
task.setWork(work);
task.setConsole(mock(LogHelper.class));
QueryPlan mockQueryPlan = mock(QueryPlan.class);
doReturn(UUID.randomUUID().toString()).when(mockQueryPlan).getQueryId();
task.setQueryPlan(mockQueryPlan);
conf = new JobConf();
appLr = mock(LocalResource.class);
HiveConf hiveConf = new HiveConf();
hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
SessionState.start(hiveConf);
session = mock(TezClient.class);
sessionState = mock(TezSessionState.class);
when(sessionState.getSession()).thenReturn(session);
when(session.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning("")).thenReturn(mock(DAGClient.class));
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class TestGenTezWork method testCreateReduce.
@Test
public void testCreateReduce() throws SemanticException {
// create map
proc.process(rs, null, ctx, (Object[]) null);
// create reduce
proc.process(fs, null, ctx, (Object[]) null);
TezWork work = ctx.currentTask.getWork();
assertEquals(work.getAllWork().size(), 2);
BaseWork w = work.getAllWork().get(1);
assertTrue(w instanceof ReduceWork);
assertTrue(work.getParents(w).contains(work.getAllWork().get(0)));
ReduceWork rw = (ReduceWork) w;
// need to make sure names are set for tez to connect things right
assertNotNull(w.getName());
// map work should start with our ts op
assertSame(rw.getReducer(), fs);
// should have severed the ties
assertEquals(fs.getParentOperators().size(), 0);
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class LocalHiveSparkClient method refreshLocalResources.
/**
* At this point single SparkContext is used by more than one thread, so make this
* method synchronized.
*
* This method can't remove a jar/resource from SparkContext. Looks like this is an
* issue we have to live with until multiple SparkContexts are supported in a single JVM.
*/
private synchronized void refreshLocalResources(SparkWork sparkWork, HiveConf conf) {
// add hive-exec jar
addJars((new JobConf(this.getClass())).getJar());
// add aux jars
addJars(conf.getAuxJars());
addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars());
// add added jars
String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars);
addJars(addedJars);
// add plugin module jars on demand
// jobConf will hold all the configuration for hadoop, tez, and hive
JobConf jobConf = new JobConf(conf);
jobConf.set(MR_JAR_PROPERTY, "");
for (BaseWork work : sparkWork.getAllWork()) {
work.configureJobConf(jobConf);
}
addJars(conf.get(MR_JAR_PROPERTY));
// add added files
String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles);
addResources(addedFiles);
// add added archives
String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives);
addResources(addedArchives);
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class RemoteHiveSparkClient method refreshLocalResources.
private void refreshLocalResources(SparkWork sparkWork, HiveConf conf) throws IOException {
// add hive-exec jar
addJars((new JobConf(this.getClass())).getJar());
// add aux jars
addJars(conf.getAuxJars());
addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars());
// add added jars
String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars);
addJars(addedJars);
// add plugin module jars on demand
// jobConf will hold all the configuration for hadoop, tez, and hive
JobConf jobConf = new JobConf(conf);
jobConf.set(MR_JAR_PROPERTY, "");
for (BaseWork work : sparkWork.getAllWork()) {
work.configureJobConf(jobConf);
}
addJars(conf.get(MR_JAR_PROPERTY));
// add added files
String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles);
addResources(addedFiles);
// add added archives
String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives);
addResources(addedArchives);
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class Utilities method getBaseWork.
/**
* Returns the Map or Reduce plan
* Side effect: the BaseWork returned is also placed in the gWorkMap
* @param conf
* @param name
* @return BaseWork based on the name supplied will return null if name is null
* @throws RuntimeException if the configuration files are not proper or if plan can not be loaded
*/
private static BaseWork getBaseWork(Configuration conf, String name) {
Path path = null;
InputStream in = null;
Kryo kryo = SerializationUtilities.borrowKryo();
try {
String engine = HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_ENGINE);
if (engine.equals("spark")) {
// TODO Add jar into current thread context classloader as it may be invoked by Spark driver inside
// threads, should be unnecessary while SPARK-5377 is resolved.
String addedJars = conf.get(HIVE_ADDED_JARS);
if (addedJars != null && !addedJars.isEmpty()) {
ClassLoader loader = Thread.currentThread().getContextClassLoader();
ClassLoader newLoader = addToClassPath(loader, addedJars.split(";"));
Thread.currentThread().setContextClassLoader(newLoader);
kryo.setClassLoader(newLoader);
}
}
path = getPlanPath(conf, name);
LOG.info("PLAN PATH = " + path);
if (path == null) {
// Map/reduce plan may not be generated
return null;
}
BaseWork gWork = gWorkMap.get(conf).get(path);
if (gWork == null) {
Path localPath = path;
LOG.debug("local path = " + localPath);
final long serializedSize;
final String planMode;
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_RPC_QUERY_PLAN)) {
LOG.debug("Loading plan from string: " + path.toUri().getPath());
String planString = conf.getRaw(path.toUri().getPath());
if (planString == null) {
LOG.info("Could not find plan string in conf");
return null;
}
serializedSize = planString.length();
planMode = "RPC";
byte[] planBytes = Base64.decodeBase64(planString);
in = new ByteArrayInputStream(planBytes);
in = new InflaterInputStream(in);
} else {
LOG.debug("Open file to read in plan: " + localPath);
FileSystem fs = localPath.getFileSystem(conf);
in = fs.open(localPath);
serializedSize = fs.getFileStatus(localPath).getLen();
planMode = "FILE";
}
if (MAP_PLAN_NAME.equals(name)) {
if (ExecMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
gWork = SerializationUtilities.deserializePlan(kryo, in, MapWork.class);
} else if (MergeFileMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
gWork = SerializationUtilities.deserializePlan(kryo, in, MergeFileWork.class);
} else if (ColumnTruncateMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
gWork = SerializationUtilities.deserializePlan(kryo, in, ColumnTruncateWork.class);
} else if (PartialScanMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
gWork = SerializationUtilities.deserializePlan(kryo, in, PartialScanWork.class);
} else {
throw new RuntimeException("unable to determine work from configuration ." + MAPRED_MAPPER_CLASS + " was " + conf.get(MAPRED_MAPPER_CLASS));
}
} else if (REDUCE_PLAN_NAME.equals(name)) {
if (ExecReducer.class.getName().equals(conf.get(MAPRED_REDUCER_CLASS))) {
gWork = SerializationUtilities.deserializePlan(kryo, in, ReduceWork.class);
} else {
throw new RuntimeException("unable to determine work from configuration ." + MAPRED_REDUCER_CLASS + " was " + conf.get(MAPRED_REDUCER_CLASS));
}
} else if (name.contains(MERGE_PLAN_NAME)) {
if (name.startsWith(MAPNAME)) {
gWork = SerializationUtilities.deserializePlan(kryo, in, MapWork.class);
} else if (name.startsWith(REDUCENAME)) {
gWork = SerializationUtilities.deserializePlan(kryo, in, ReduceWork.class);
} else {
throw new RuntimeException("Unknown work type: " + name);
}
}
LOG.info("Deserialized plan (via {}) - name: {} size: {}", planMode, gWork.getName(), humanReadableByteCount(serializedSize));
gWorkMap.get(conf).put(path, gWork);
} else if (LOG.isDebugEnabled()) {
LOG.debug("Found plan in cache for name: " + name);
}
return gWork;
} catch (FileNotFoundException fnf) {
// happens. e.g.: no reduce work.
LOG.debug("No plan file found: " + path + "; " + fnf.getMessage());
return null;
} catch (Exception e) {
String msg = "Failed to load plan: " + path;
LOG.error("Failed to load plan: " + path, e);
throw new RuntimeException(msg, e);
} finally {
SerializationUtilities.releaseKryo(kryo);
if (in != null) {
try {
in.close();
} catch (IOException cantBlameMeForTrying) {
}
}
}
}
Aggregations