use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.
the class TestInputSplitAllocatorDFS method testInputSplitAllocator.
@Test
public void testInputSplitAllocator() throws Exception {
LOG = LoggerFactory.getLogger(TestInputSplitAllocatorDFS.class);
fs.copyFromLocalFile(new Path("src/test/resources/InputSplitAllocatorDFSTestData.txt"), new Path("test/inpath"));
OrbConfiguration orbConf = new OrbConfiguration(true);
orbConf.set("fs.default.name", "hdfs://localhost:" + cluster.getNameNodePort());
orbConf.setJobNumber("0");
orbConf.setFileInputPath("test/inpath");
orbConf.setNameNode("hdfs://localhost:" + cluster.getNameNodePort());
String hostname = OrbDNS.getDefaultHost(orbConf);
if (hostname.endsWith(".")) {
hostname = hostname.substring(0, hostname.length() - 1);
}
OrbPartitionMember opm1 = new OrbPartitionMember();
opm1.setHostname(hostname);
opm1.setPort(0);
OrbPartitionMember opm2 = new OrbPartitionMember();
opm2.setHostname(hostname);
opm2.setPort(1);
OrbPartitionMember opm3 = new OrbPartitionMember();
opm3.setHostname(hostname);
opm3.setPort(2);
OrbPartitionMember opm4 = new OrbPartitionMember();
opm4.setHostname(hostname);
opm4.setPort(3);
OrbPartitionMember opm5 = new OrbPartitionMember();
opm5.setHostname(hostname);
opm5.setPort(4);
OrbPartitionMember opm6 = new OrbPartitionMember();
opm6.setHostname(hostname);
opm6.setPort(5);
List<OrbPartitionMember> orbPartitionMembers = new ArrayList<OrbPartitionMember>();
orbPartitionMembers.add(opm1);
orbPartitionMembers.add(opm2);
orbPartitionMembers.add(opm3);
orbPartitionMembers.add(opm4);
orbPartitionMembers.add(opm5);
orbPartitionMembers.add(opm6);
InputSplitAllocator isa = new InputSplitAllocator(orbConf, orbPartitionMembers);
Map<OrbPartitionMember, List<RawSplit>> inputSplitAssignments = isa.assignInputSplits();
long totalFileSize = 0;
for (OrbPartitionMember orbPartitionMember : inputSplitAssignments.keySet()) {
long rawSplitSize = 0;
for (RawSplit rSplit : inputSplitAssignments.get(orbPartitionMember)) {
rawSplitSize += rSplit.getDataLength();
}
totalFileSize += rawSplitSize;
LOG.info(orbPartitionMember.getHostname() + ":" + orbPartitionMember.getPort() + " | RawSplits count: " + inputSplitAssignments.get(orbPartitionMember).size() + " | RawSplits size: " + rawSplitSize);
assertTrue(inputSplitAssignments.get(orbPartitionMember).size() <= 5);
}
File testFile = new File("src/test/resources/InputSplitAllocatorDFSTestData.txt");
assertTrue(totalFileSize == testFile.length());
}
use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.
the class OrbPartition method loadVerticesLeader.
/**
*/
private void loadVerticesLeader() {
enterBarrier("startLoadVerticesBarrier");
synchronized (leaderGroupMembers) {
// Here InputSplits are sent to their constituent partitions for loading
InputSplitAllocator inputSplitAllocator = new InputSplitAllocator(getOrbConf(), leaderGroupMembers);
Map<OrbPartitionMember, List<RawSplit>> inputSplitAssignments = inputSplitAllocator.assignInputSplits();
for (OrbPartitionMember orbPartitionMember : inputSplitAssignments.keySet()) {
for (RawSplit rawSplit : inputSplitAssignments.get(orbPartitionMember)) {
orbPartitionMember.loadVerticesFromInputSplit(rawSplit);
}
}
}
enterBarrier("sentInputSplitsBarrier");
// just like the slave we have to wait for the InputSplitHandlers to finish loading and sending vertices
while (!inputSplitLoaderHandlers.isEmpty()) {
synchronized (this) {
try {
wait(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
enterBarrier("inputSplitHandlersCompleteBarrier");
// them into vertices
while (!loadVerticesHandlers.isEmpty()) {
synchronized (this) {
try {
wait(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
enterBarrier("loadVerticesIntoPartitionBarrier");
LOG.debug("Completed Loading vertices!!!");
if (standby) {
waitForActivate();
}
process();
// try {
// ZookeeperUtils.tryToCreateNode(zk, jobInProgressPath + "/messages/complete");
// } catch (OrbZKFailure e) {
// e.printStackTrace();
// }
// System.exit(1);
}
use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.
the class InputSplitAllocator method assignInputSplits.
/**
* This method assigns raw splits to partition members given a Collection of raw splits.
*
* @param rawSplits
* - a Collection of RawSplit objects
* @returns Map
*/
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits(Collection<RawSplit> rawSplits) {
Map<OrbPartitionMember, List<RawSplit>> mapOfSplitsToPartitions = new HashMap<OrbPartitionMember, List<RawSplit>>();
List<RawSplit> notLocalRawSplits = new ArrayList<RawSplit>();
for (RawSplit rawSplit : rawSplits) {
List<String> viableLocations = getViableHosts(rawSplit.getLocations());
if (viableLocations.size() < 1) {
notLocalRawSplits.add(rawSplit);
LOG.debug("Non-local split found: " + rawSplit);
} else {
String host = getLightestHost(viableLocations);
int port = getLightestPort(host);
List<RawSplit> rawSplitAssignedList = hostToPortToRawSplits.get(host).get(port);
rawSplitAssignedList.add(rawSplit);
hostToPortToRawSplits.get(host).put(port, rawSplitAssignedList);
int count = hostToRawSplitCount.get(host);
count++;
hostToRawSplitCount.put(host, count);
}
}
for (RawSplit rawSplit : notLocalRawSplits) {
String host = getLightestHostAll();
int port = getLightestPort(host);
List<RawSplit> rawSplitAssignedList = hostToPortToRawSplits.get(host).get(port);
rawSplitAssignedList.add(rawSplit);
hostToPortToRawSplits.get(host).put(port, rawSplitAssignedList);
int count = hostToRawSplitCount.get(host);
count++;
hostToRawSplitCount.put(host, count);
}
for (OrbPartitionMember orbPartitionMember : orbPartitionMembers) {
mapOfSplitsToPartitions.put(orbPartitionMember, hostToPortToRawSplits.get(orbPartitionMember.getHostname()).get(orbPartitionMember.getPort()));
}
return mapOfSplitsToPartitions;
}
use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.
the class InputSplitAllocator method assignInputSplits.
/**
* This method gets the raw splits and calls another method to assign them.
*
* @returns Map
*/
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
List<RawSplit> rawSplits = null;
JobConf job = new JobConf(orbConf);
LOG.debug(orbConf.getJobNumber().toString());
JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
try {
input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
rawSplits = new ArrayList<RawSplit>(splits.size());
DataOutputBuffer buffer = new DataOutputBuffer();
SerializationFactory factory = new SerializationFactory(orbConf);
Serializer serializer = factory.getSerializer(splits.get(0).getClass());
serializer.open(buffer);
for (int i = 0; i < splits.size(); i++) {
buffer.reset();
serializer.serialize(splits.get(i));
RawSplit rawSplit = new RawSplit();
rawSplit.setClassName(splits.get(i).getClass().getName());
rawSplit.setDataLength(splits.get(i).getLength());
rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
rawSplit.setLocations(splits.get(i).getLocations());
rawSplits.add(rawSplit);
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (InterruptedException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
return assignInputSplits(rawSplits);
}
use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.
the class TestInputSplitAllocator method inputSplitAllocatorTest.
/**
*/
@Test
public void inputSplitAllocatorTest() {
LOG = LoggerFactory.getLogger(TestInputSplitAllocator.class);
String[] rs1l = { "A", "B" };
RawSplitWithID rs1 = new RawSplitWithID("rs1", rs1l);
String[] rs2l = { "B", "C" };
RawSplitWithID rs2 = new RawSplitWithID("rs2", rs2l);
String[] rs3l = { "E", "F" };
RawSplitWithID rs3 = new RawSplitWithID("rs3", rs3l);
String[] rs4l = { "C" };
RawSplitWithID rs4 = new RawSplitWithID("rs4", rs4l);
List<RawSplit> rawSplits = new ArrayList<RawSplit>();
rawSplits.add(rs1);
rawSplits.add(rs2);
rawSplits.add(rs3);
rawSplits.add(rs4);
OrbPartitionMember opm1 = new OrbPartitionMember();
opm1.setHostname("A");
opm1.setPort(0);
OrbPartitionMember opm2 = new OrbPartitionMember();
opm2.setHostname("A");
opm2.setPort(1);
OrbPartitionMember opm3 = new OrbPartitionMember();
opm3.setHostname("B");
opm3.setPort(0);
OrbPartitionMember opm4 = new OrbPartitionMember();
opm4.setHostname("B");
opm4.setPort(1);
OrbPartitionMember opm5 = new OrbPartitionMember();
opm5.setHostname("C");
opm5.setPort(0);
OrbPartitionMember opm6 = new OrbPartitionMember();
opm6.setHostname("C");
opm6.setPort(1);
List<OrbPartitionMember> orbPartitionMembers = new ArrayList<OrbPartitionMember>();
orbPartitionMembers.add(opm1);
orbPartitionMembers.add(opm2);
orbPartitionMembers.add(opm3);
orbPartitionMembers.add(opm4);
orbPartitionMembers.add(opm5);
orbPartitionMembers.add(opm6);
OrbConfiguration orbConf = new OrbConfiguration();
InputSplitAllocator isa = new InputSplitAllocator(orbConf, orbPartitionMembers);
Map<OrbPartitionMember, List<RawSplit>> assignedSplits = isa.assignInputSplits(rawSplits);
for (OrbPartitionMember orbPartitionMember : assignedSplits.keySet()) {
LOG.info(orbPartitionMember.getHostname() + ":" + orbPartitionMember.getPort() + " | " + assignedSplits.get(orbPartitionMember));
assertTrue(assignedSplits.get(orbPartitionMember).size() < 2);
}
}
Aggregations