use of org.apache.hadoop.mapred.InputSplit in project asterixdb by apache.
the class SchedulerTest method testSchedulerSimple.
/**
* Test the scheduler for the case when the Hyracks cluster is the HDFS cluster
*
* @throws Exception
*/
public void testSchedulerSimple() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = TestUtils.generateNodeControllerInfo(6, "nc", "10.0.0.", 5099, 5098, 5097);
InputSplit[] fileSplits = new InputSplit[6];
fileSplits[0] = new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
fileSplits[1] = new FileSplit(new Path("part-2"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[2] = new FileSplit(new Path("part-3"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.6" });
fileSplits[3] = new FileSplit(new Path("part-4"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.6" });
fileSplits[4] = new FileSplit(new Path("part-5"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[5] = new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
String[] expectedResults = new String[] { "nc1", "nc4", "nc6", "nc2", "nc3", "nc5" };
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
ClusterTopology topology = parseTopology();
scheduler = new Scheduler(ncNameToNcInfos, topology);
locationConstraints = scheduler.getLocationConstraints(fileSplits);
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
}
use of org.apache.hadoop.mapred.InputSplit in project asterixdb by apache.
the class SchedulerTest method testSchedulerLargerHDFS.
/**
* Test the case where the HDFS cluster is a larger than the Hyracks cluster
*
* @throws Exception
*/
public void testSchedulerLargerHDFS() throws Exception {
int dataPort = 5099;
int resultPort = 5098;
int messagingPort = 5097;
Map<String, NodeControllerInfo> ncNameToNcInfos = TestUtils.generateNodeControllerInfo(4, "nc", "10.0.0.", dataPort, resultPort, messagingPort);
ncNameToNcInfos.put("nc7", new NodeControllerInfo("nc7", NodeStatus.ALIVE, new NetworkAddress("10.0.0.7", dataPort), new NetworkAddress("10.0.0.5", resultPort), new NetworkAddress("10.0.0.5", messagingPort), 2));
ncNameToNcInfos.put("nc12", new NodeControllerInfo("nc12", NodeStatus.ALIVE, new NetworkAddress("10.0.0.12", dataPort), new NetworkAddress("10.0.0.5", resultPort), new NetworkAddress("10.0.0.5", messagingPort), 2));
InputSplit[] fileSplits = new InputSplit[12];
fileSplits[0] = new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
fileSplits[1] = new FileSplit(new Path("part-2"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[2] = new FileSplit(new Path("part-3"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.6" });
fileSplits[3] = new FileSplit(new Path("part-4"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.6" });
fileSplits[4] = new FileSplit(new Path("part-5"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[5] = new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
fileSplits[6] = new FileSplit(new Path("part-7"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
fileSplits[7] = new FileSplit(new Path("part-8"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[8] = new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.14", "10.0.0.11", "10.0.0.13" });
fileSplits[9] = new FileSplit(new Path("part-10"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.6" });
fileSplits[10] = new FileSplit(new Path("part-11"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.7" });
fileSplits[11] = new FileSplit(new Path("part-9"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.6" });
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc12", "nc7", "nc7", "nc12" };
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc7", "nc12", "nc7", "nc12" };
ClusterTopology topology = parseTopology();
scheduler = new Scheduler(ncNameToNcInfos, topology);
locationConstraints = scheduler.getLocationConstraints(fileSplits);
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
}
use of org.apache.hadoop.mapred.InputSplit in project asterixdb by apache.
the class SchedulerTest method testSchedulerSmallerHDFSOdd.
/**
* Test the case where the HDFS cluster is a larger than the Hyracks cluster
*
* @throws Exception
*/
public void testSchedulerSmallerHDFSOdd() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = TestUtils.generateNodeControllerInfo(6, "nc", "10.0.0.", 5099, 5098, 5097);
InputSplit[] fileSplits = new InputSplit[13];
fileSplits[0] = new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
fileSplits[1] = new FileSplit(new Path("part-2"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[2] = new FileSplit(new Path("part-3"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.3" });
fileSplits[3] = new FileSplit(new Path("part-4"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.3" });
fileSplits[4] = new FileSplit(new Path("part-5"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[5] = new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
fileSplits[6] = new FileSplit(new Path("part-7"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
fileSplits[7] = new FileSplit(new Path("part-8"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[8] = new FileSplit(new Path("part-9"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.1" });
fileSplits[9] = new FileSplit(new Path("part-10"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.2" });
fileSplits[10] = new FileSplit(new Path("part-11"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[11] = new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
fileSplits[12] = new FileSplit(new Path("part-13"), 0, 0, new String[] { "10.0.0.2", "10.0.0.4", "10.0.0.5" });
String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc5", "nc1", "nc5", "nc2", "nc4" };
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
ClusterTopology topology = parseTopology();
scheduler = new Scheduler(ncNameToNcInfos, topology);
locationConstraints = scheduler.getLocationConstraints(fileSplits);
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
}
use of org.apache.hadoop.mapred.InputSplit in project asterixdb by apache.
the class HDFSReadOperatorDescriptor method createPushRuntime.
@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException {
final InputSplit[] inputSplits = splitsFactory.getSplits();
return new AbstractUnaryOutputSourceOperatorNodePushable() {
private String nodeName = ctx.getJobletContext().getServiceContext().getNodeId();
@SuppressWarnings("unchecked")
@Override
public void initialize() throws HyracksDataException {
ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
try {
writer.open();
Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
JobConf conf = confFactory.getConf();
conf.setClassLoader(ctx.getJobletContext().getClassLoader());
IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
try {
parser.open(writer);
InputFormat inputFormat = conf.getInputFormat();
for (int i = 0; i < inputSplits.length; i++) {
/**
* read all the partitions scheduled to the current node
*/
if (scheduledLocations[i].equals(nodeName)) {
/**
* pick an unread split to read
* synchronize among simultaneous partitions in the same machine
*/
synchronized (executed) {
if (executed[i] == false) {
executed[i] = true;
} else {
continue;
}
}
/**
* read the split
*/
RecordReader reader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);
Object key = reader.createKey();
Object value = reader.createValue();
while (reader.next(key, value) == true) {
parser.parse(key, value, writer, inputSplits[i].toString());
}
}
}
} finally {
parser.close(writer);
}
} catch (Throwable th) {
writer.fail();
throw new HyracksDataException(th);
} finally {
writer.close();
Thread.currentThread().setContextClassLoader(ctxCL);
}
}
};
}
use of org.apache.hadoop.mapred.InputSplit in project asterixdb by apache.
the class RackAwareNcCollectionBuilder method build.
@Override
public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos, final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs, final int[] workloads, final int slotLimit) {
try {
final Map<List<Integer>, List<String>> pathToNCs = new HashMap<List<Integer>, List<String>>();
for (int i = 0; i < NCs.length; i++) {
List<Integer> path = new ArrayList<Integer>();
String ipAddress = InetAddress.getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress()).getHostAddress();
topology.lookupNetworkTerminal(ipAddress, path);
if (path.size() <= 0) {
// if the hyracks nc is not in the defined cluster
path.add(Integer.MIN_VALUE);
LOGGER.info(NCs[i] + "'s IP address is not in the cluster toplogy file!");
}
List<String> ncs = pathToNCs.get(path);
if (ncs == null) {
ncs = new ArrayList<String>();
pathToNCs.put(path, ncs);
}
ncs.add(NCs[i]);
}
final TreeMap<List<Integer>, IntWritable> availableIpsToSlots = new TreeMap<List<Integer>, IntWritable>(new Comparator<List<Integer>>() {
@Override
public int compare(List<Integer> l1, List<Integer> l2) {
int commonLength = Math.min(l1.size(), l2.size());
for (int i = 0; i < commonLength; i++) {
Integer value1 = l1.get(i);
Integer value2 = l2.get(i);
int cmp = value1 > value2 ? 1 : (value1 < value2 ? -1 : 0);
if (cmp != 0) {
return cmp;
}
}
return l1.size() > l2.size() ? 1 : (l1.size() < l2.size() ? -1 : 0);
}
});
for (int i = 0; i < workloads.length; i++) {
if (workloads[i] < slotLimit) {
List<Integer> path = new ArrayList<Integer>();
String ipAddress = InetAddress.getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress()).getHostAddress();
topology.lookupNetworkTerminal(ipAddress, path);
if (path.size() <= 0) {
// if the hyracks nc is not in the defined cluster
path.add(Integer.MIN_VALUE);
}
IntWritable availableSlot = availableIpsToSlots.get(path);
if (availableSlot == null) {
availableSlot = new IntWritable(slotLimit - workloads[i]);
availableIpsToSlots.put(path, availableSlot);
} else {
availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
}
}
}
return new INcCollection() {
@Override
public String findNearestAvailableSlot(InputSplit split) {
try {
String[] locs = split.getLocations();
int minDistance = Integer.MAX_VALUE;
List<Integer> currentCandidatePath = null;
if (locs == null || locs.length > 0) {
for (int j = 0; j < locs.length; j++) {
/**
* get all the IP addresses from the name
*/
InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
boolean inTopology = false;
for (InetAddress ip : allIps) {
List<Integer> splitPath = new ArrayList<Integer>();
boolean inCluster = topology.lookupNetworkTerminal(ip.getHostAddress(), splitPath);
if (!inCluster) {
continue;
}
inTopology = true;
/**
* if the node controller exists
*/
List<Integer> candidatePath = availableIpsToSlots.floorKey(splitPath);
if (candidatePath == null) {
candidatePath = availableIpsToSlots.ceilingKey(splitPath);
}
if (candidatePath != null) {
if (availableIpsToSlots.get(candidatePath).get() > 0) {
int distance = distance(splitPath, candidatePath);
if (minDistance > distance) {
minDistance = distance;
currentCandidatePath = candidatePath;
}
}
}
}
if (!inTopology) {
LOGGER.info(locs[j] + "'s IP address is not in the cluster toplogy file!");
/**
* if the machine is not in the toplogy file
*/
List<Integer> candidatePath = null;
for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
if (entry.getValue().get() > 0) {
candidatePath = entry.getKey();
break;
}
}
/** the split path is empty */
if (candidatePath != null) {
if (availableIpsToSlots.get(candidatePath).get() > 0) {
currentCandidatePath = candidatePath;
}
}
}
}
} else {
for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
if (entry.getValue().get() > 0) {
currentCandidatePath = entry.getKey();
break;
}
}
}
if (currentCandidatePath != null && currentCandidatePath.size() > 0) {
/**
* Update the entry of the selected IP
*/
IntWritable availableSlot = availableIpsToSlots.get(currentCandidatePath);
availableSlot.set(availableSlot.get() - 1);
if (availableSlot.get() == 0) {
availableIpsToSlots.remove(currentCandidatePath);
}
/**
* Update the entry of the selected NC
*/
List<String> candidateNcs = pathToNCs.get(currentCandidatePath);
for (String candidate : candidateNcs) {
int ncIndex = ncNameToIndex.get(candidate);
if (workloads[ncIndex] < slotLimit) {
return candidate;
}
}
}
/** not scheduled */
return null;
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
@Override
public int numAvailableSlots() {
return availableIpsToSlots.size();
}
private int distance(List<Integer> splitPath, List<Integer> candidatePath) {
int commonLength = Math.min(splitPath.size(), candidatePath.size());
int distance = 0;
for (int i = 0; i < commonLength; i++) {
distance = distance * 100 + Math.abs(splitPath.get(i) - candidatePath.get(i));
}
List<Integer> restElements = splitPath.size() > candidatePath.size() ? splitPath : candidatePath;
for (int i = commonLength; i < restElements.size(); i++) {
distance = distance * 100 + Math.abs(restElements.get(i));
}
return distance;
}
};
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
Aggregations