use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class TestTezTask method setUp.
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
utils = mock(DagUtils.class);
fs = mock(FileSystem.class);
path = mock(Path.class);
when(path.getFileSystem(any(Configuration.class))).thenReturn(fs);
when(utils.getTezDir(any(Path.class))).thenReturn(path);
when(utils.createVertex(any(JobConf.class), any(BaseWork.class), any(Path.class), any(FileSystem.class), any(Context.class), anyBoolean(), any(TezWork.class), any(VertexType.class), any(Map.class))).thenAnswer(new Answer<Vertex>() {
@Override
public Vertex answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return Vertex.create(((BaseWork) args[1]).getName(), mock(ProcessorDescriptor.class), 0, mock(Resource.class));
}
});
when(utils.createEdge(any(JobConf.class), any(Vertex.class), any(Vertex.class), any(TezEdgeProperty.class), any(BaseWork.class), any(TezWork.class))).thenAnswer(new Answer<Edge>() {
@Override
public Edge answer(InvocationOnMock invocation) throws Throwable {
Object[] args = invocation.getArguments();
return Edge.create((Vertex) args[1], (Vertex) args[2], mock(EdgeProperty.class));
}
});
work = new TezWork("", null);
mws = new MapWork[] { new MapWork(), new MapWork() };
rws = new ReduceWork[] { new ReduceWork(), new ReduceWork() };
work.addAll(mws);
work.addAll(rws);
int i = 0;
for (BaseWork w : work.getAllWork()) {
w.setName("Work " + (++i));
}
op = mock(Operator.class);
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", op);
mws[0].setAliasToWork(map);
mws[1].setAliasToWork(map);
LinkedHashMap<Path, ArrayList<String>> pathMap = new LinkedHashMap<>();
ArrayList<String> aliasList = new ArrayList<String>();
aliasList.add("foo");
pathMap.put(new Path("foo"), aliasList);
mws[0].setPathToAliases(pathMap);
mws[1].setPathToAliases(pathMap);
rws[0].setReducer(op);
rws[1].setReducer(op);
TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
work.connect(mws[0], rws[0], edgeProp);
work.connect(mws[1], rws[0], edgeProp);
work.connect(rws[0], rws[1], edgeProp);
task = new TezTask(utils);
task.setWork(work);
task.setConsole(mock(LogHelper.class));
QueryPlan mockQueryPlan = mock(QueryPlan.class);
doReturn(UUID.randomUUID().toString()).when(mockQueryPlan).getQueryId();
task.setQueryPlan(mockQueryPlan);
conf = new JobConf();
appLr = createResource("foo.jar");
HiveConf hiveConf = new HiveConf();
hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
SessionState.start(hiveConf);
session = mock(TezClient.class);
sessionState = mock(TezSessionState.class);
when(sessionState.getSession()).thenReturn(session);
when(sessionState.reopen()).thenReturn(sessionState);
when(session.submitDAG(any(DAG.class))).thenThrow(new SessionNotRunning("")).thenReturn(mock(DAGClient.class));
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class LocalHiveSparkClient method refreshLocalResources.
/**
* At this point single SparkContext is used by more than one thread, so make this
* method synchronized.
*
* This method can't remove a jar/resource from SparkContext. Looks like this is an
* issue we have to live with until multiple SparkContexts are supported in a single JVM.
*/
private synchronized void refreshLocalResources(SparkWork sparkWork, HiveConf conf) {
// add hive-exec jar
addJars((new JobConf(this.getClass())).getJar());
// add aux jars
addJars(conf.getAuxJars());
addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars());
// add added jars
String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars);
addJars(addedJars);
// add plugin module jars on demand
// jobConf will hold all the configuration for hadoop, tez, and hive
JobConf jobConf = new JobConf(conf);
jobConf.set(MR_JAR_PROPERTY, "");
for (BaseWork work : sparkWork.getAllWork()) {
work.configureJobConf(jobConf);
}
addJars(jobConf.get(MR_JAR_PROPERTY));
// add added files
String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles);
addResources(addedFiles);
// add added archives
String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives);
addResources(addedArchives);
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class RemoteHiveSparkClient method refreshLocalResources.
private void refreshLocalResources(SparkWork sparkWork, HiveConf conf) throws IOException {
// add hive-exec jar
addJars((new JobConf(this.getClass())).getJar());
// add aux jars
addJars(conf.getAuxJars());
addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars());
// add added jars
String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars);
addJars(addedJars);
// add plugin module jars on demand
// jobConf will hold all the configuration for hadoop, tez, and hive
JobConf jobConf = new JobConf(conf);
jobConf.set(MR_JAR_PROPERTY, "");
for (BaseWork work : sparkWork.getAllWork()) {
work.configureJobConf(jobConf);
}
addJars(jobConf.get(MR_JAR_PROPERTY));
// remove the location of container tokens
conf.unset(MR_CREDENTIALS_LOCATION_PROPERTY);
// add added files
String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles);
addResources(addedFiles);
// add added archives
String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE);
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives);
addResources(addedArchives);
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class SparkPlanGenerator method generate.
public SparkPlan generate(SparkWork sparkWork) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
SparkPlan sparkPlan = new SparkPlan(this.sc.sc());
cloneToWork = sparkWork.getCloneToWork();
workToTranMap.clear();
workToParentWorkTranMap.clear();
try {
for (BaseWork work : sparkWork.getAllWork()) {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
SparkTran tran = generate(work, sparkWork);
SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
sparkPlan.addTran(tran);
sparkPlan.connect(parentTran, tran);
workToTranMap.put(work, tran);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
}
} finally {
// clear all ThreadLocal cached MapWork/ReduceWork after plan generation
// as this may executed in a pool thread.
Utilities.clearWorkMap(jobConf);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
return sparkPlan;
}
use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.
the class DagUtils method createEdgeProperty.
/*
* Helper function to create an edge property from an edge type.
*/
private EdgeProperty createEdgeProperty(Vertex w, TezEdgeProperty edgeProp, Configuration conf, BaseWork work, TezWork tezWork) throws IOException {
MRHelpers.translateMRConfToTez(conf);
String keyClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS);
String valClass = conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS);
String partitionerClassName = conf.get("mapred.partitioner.class");
Map<String, String> partitionerConf;
EdgeType edgeType = edgeProp.getEdgeType();
switch(edgeType) {
case BROADCAST_EDGE:
UnorderedKVEdgeConfig et1Conf = UnorderedKVEdgeConfig.newBuilder(keyClass, valClass).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
return et1Conf.createDefaultBroadcastEdgeProperty();
case CUSTOM_EDGE:
assert partitionerClassName != null;
partitionerConf = createPartitionerConf(partitionerClassName, conf);
UnorderedPartitionedKVEdgeConfig et2Conf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
EdgeManagerPluginDescriptor edgeDesc = EdgeManagerPluginDescriptor.create(CustomPartitionEdge.class.getName());
CustomEdgeConfiguration edgeConf = new CustomEdgeConfiguration(edgeProp.getNumBuckets(), null);
DataOutputBuffer dob = new DataOutputBuffer();
edgeConf.write(dob);
byte[] userPayload = dob.getData();
edgeDesc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
return et2Conf.createDefaultCustomEdgeProperty(edgeDesc);
case CUSTOM_SIMPLE_EDGE:
assert partitionerClassName != null;
partitionerConf = createPartitionerConf(partitionerClassName, conf);
UnorderedPartitionedKVEdgeConfig et3Conf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
return et3Conf.createDefaultEdgeProperty();
case ONE_TO_ONE_EDGE:
UnorderedKVEdgeConfig et4Conf = UnorderedKVEdgeConfig.newBuilder(keyClass, valClass).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
return et4Conf.createDefaultOneToOneEdgeProperty();
case XPROD_EDGE:
EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
List<String> crossProductSources = new ArrayList<>();
for (BaseWork parentWork : tezWork.getParents(work)) {
if (EdgeType.XPROD_EDGE == tezWork.getEdgeType(parentWork, work)) {
crossProductSources.add(parentWork.getName());
}
}
CartesianProductConfig cpConfig = new CartesianProductConfig(crossProductSources);
edgeManagerDescriptor.setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf)));
UnorderedPartitionedKVEdgeConfig cpEdgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, ValueHashPartitioner.class.getName()).build();
return cpEdgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);
case SIMPLE_EDGE:
// fallthrough
default:
assert partitionerClassName != null;
partitionerConf = createPartitionerConf(partitionerClassName, conf);
OrderedPartitionedKVEdgeConfig et5Conf = OrderedPartitionedKVEdgeConfig.newBuilder(keyClass, valClass, MRPartitioner.class.getName(), partitionerConf).setFromConfiguration(conf).setKeySerializationClass(TezBytesWritableSerialization.class.getName(), TezBytesComparator.class.getName(), null).setValueSerializationClass(TezBytesWritableSerialization.class.getName(), null).build();
return et5Conf.createDefaultEdgeProperty();
}
}
Aggregations