Search in sources :

Example 1 with RawSplit

use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.

the class TestInputSplitAllocatorDFS method testInputSplitAllocator.

@Test
public void testInputSplitAllocator() throws Exception {
    LOG = LoggerFactory.getLogger(TestInputSplitAllocatorDFS.class);
    fs.copyFromLocalFile(new Path("src/test/resources/InputSplitAllocatorDFSTestData.txt"), new Path("test/inpath"));
    OrbConfiguration orbConf = new OrbConfiguration(true);
    orbConf.set("fs.default.name", "hdfs://localhost:" + cluster.getNameNodePort());
    orbConf.setJobNumber("0");
    orbConf.setFileInputPath("test/inpath");
    orbConf.setNameNode("hdfs://localhost:" + cluster.getNameNodePort());
    String hostname = OrbDNS.getDefaultHost(orbConf);
    if (hostname.endsWith(".")) {
        hostname = hostname.substring(0, hostname.length() - 1);
    }
    OrbPartitionMember opm1 = new OrbPartitionMember();
    opm1.setHostname(hostname);
    opm1.setPort(0);
    OrbPartitionMember opm2 = new OrbPartitionMember();
    opm2.setHostname(hostname);
    opm2.setPort(1);
    OrbPartitionMember opm3 = new OrbPartitionMember();
    opm3.setHostname(hostname);
    opm3.setPort(2);
    OrbPartitionMember opm4 = new OrbPartitionMember();
    opm4.setHostname(hostname);
    opm4.setPort(3);
    OrbPartitionMember opm5 = new OrbPartitionMember();
    opm5.setHostname(hostname);
    opm5.setPort(4);
    OrbPartitionMember opm6 = new OrbPartitionMember();
    opm6.setHostname(hostname);
    opm6.setPort(5);
    List<OrbPartitionMember> orbPartitionMembers = new ArrayList<OrbPartitionMember>();
    orbPartitionMembers.add(opm1);
    orbPartitionMembers.add(opm2);
    orbPartitionMembers.add(opm3);
    orbPartitionMembers.add(opm4);
    orbPartitionMembers.add(opm5);
    orbPartitionMembers.add(opm6);
    InputSplitAllocator isa = new InputSplitAllocator(orbConf, orbPartitionMembers);
    Map<OrbPartitionMember, List<RawSplit>> inputSplitAssignments = isa.assignInputSplits();
    long totalFileSize = 0;
    for (OrbPartitionMember orbPartitionMember : inputSplitAssignments.keySet()) {
        long rawSplitSize = 0;
        for (RawSplit rSplit : inputSplitAssignments.get(orbPartitionMember)) {
            rawSplitSize += rSplit.getDataLength();
        }
        totalFileSize += rawSplitSize;
        LOG.info(orbPartitionMember.getHostname() + ":" + orbPartitionMember.getPort() + " | RawSplits count: " + inputSplitAssignments.get(orbPartitionMember).size() + " | RawSplits size: " + rawSplitSize);
        assertTrue(inputSplitAssignments.get(orbPartitionMember).size() <= 5);
    }
    File testFile = new File("src/test/resources/InputSplitAllocatorDFSTestData.txt");
    assertTrue(totalFileSize == testFile.length());
}
Also used : Path(org.apache.hadoop.fs.Path) RawSplit(org.goldenorb.io.input.RawSplit) InputSplitAllocator(org.goldenorb.io.InputSplitAllocator) OrbConfiguration(org.goldenorb.conf.OrbConfiguration) ArrayList(java.util.ArrayList) OrbPartitionMember(org.goldenorb.jet.OrbPartitionMember) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File) Test(org.junit.Test)

Example 2 with RawSplit

use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.

the class OrbPartition method loadVerticesLeader.

/**
 */
private void loadVerticesLeader() {
    enterBarrier("startLoadVerticesBarrier");
    synchronized (leaderGroupMembers) {
        // Here InputSplits are sent to their constituent partitions for loading
        InputSplitAllocator inputSplitAllocator = new InputSplitAllocator(getOrbConf(), leaderGroupMembers);
        Map<OrbPartitionMember, List<RawSplit>> inputSplitAssignments = inputSplitAllocator.assignInputSplits();
        for (OrbPartitionMember orbPartitionMember : inputSplitAssignments.keySet()) {
            for (RawSplit rawSplit : inputSplitAssignments.get(orbPartitionMember)) {
                orbPartitionMember.loadVerticesFromInputSplit(rawSplit);
            }
        }
    }
    enterBarrier("sentInputSplitsBarrier");
    // just like the slave we have to wait for the InputSplitHandlers to finish loading and sending vertices
    while (!inputSplitLoaderHandlers.isEmpty()) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
    enterBarrier("inputSplitHandlersCompleteBarrier");
    // them into vertices
    while (!loadVerticesHandlers.isEmpty()) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
    enterBarrier("loadVerticesIntoPartitionBarrier");
    LOG.debug("Completed Loading vertices!!!");
    if (standby) {
        waitForActivate();
    }
    process();
// try {
// ZookeeperUtils.tryToCreateNode(zk, jobInProgressPath + "/messages/complete");
// } catch (OrbZKFailure e) {
// e.printStackTrace();
// }
// System.exit(1);
}
Also used : RawSplit(org.goldenorb.io.input.RawSplit) InputSplitAllocator(org.goldenorb.io.InputSplitAllocator) OrbPartitionMember(org.goldenorb.jet.OrbPartitionMember) List(java.util.List) ArrayList(java.util.ArrayList)

Example 3 with RawSplit

use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.

the class InputSplitAllocator method assignInputSplits.

/**
 * This method assigns raw splits to partition members given a Collection of raw splits.
 *
 * @param rawSplits
 *          - a Collection of RawSplit objects
 * @returns Map
 */
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits(Collection<RawSplit> rawSplits) {
    Map<OrbPartitionMember, List<RawSplit>> mapOfSplitsToPartitions = new HashMap<OrbPartitionMember, List<RawSplit>>();
    List<RawSplit> notLocalRawSplits = new ArrayList<RawSplit>();
    for (RawSplit rawSplit : rawSplits) {
        List<String> viableLocations = getViableHosts(rawSplit.getLocations());
        if (viableLocations.size() < 1) {
            notLocalRawSplits.add(rawSplit);
            LOG.debug("Non-local split found: " + rawSplit);
        } else {
            String host = getLightestHost(viableLocations);
            int port = getLightestPort(host);
            List<RawSplit> rawSplitAssignedList = hostToPortToRawSplits.get(host).get(port);
            rawSplitAssignedList.add(rawSplit);
            hostToPortToRawSplits.get(host).put(port, rawSplitAssignedList);
            int count = hostToRawSplitCount.get(host);
            count++;
            hostToRawSplitCount.put(host, count);
        }
    }
    for (RawSplit rawSplit : notLocalRawSplits) {
        String host = getLightestHostAll();
        int port = getLightestPort(host);
        List<RawSplit> rawSplitAssignedList = hostToPortToRawSplits.get(host).get(port);
        rawSplitAssignedList.add(rawSplit);
        hostToPortToRawSplits.get(host).put(port, rawSplitAssignedList);
        int count = hostToRawSplitCount.get(host);
        count++;
        hostToRawSplitCount.put(host, count);
    }
    for (OrbPartitionMember orbPartitionMember : orbPartitionMembers) {
        mapOfSplitsToPartitions.put(orbPartitionMember, hostToPortToRawSplits.get(orbPartitionMember.getHostname()).get(orbPartitionMember.getPort()));
    }
    return mapOfSplitsToPartitions;
}
Also used : RawSplit(org.goldenorb.io.input.RawSplit) OrbPartitionMember(org.goldenorb.jet.OrbPartitionMember) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List)

Example 4 with RawSplit

use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.

the class InputSplitAllocator method assignInputSplits.

/**
 * This method gets the raw splits and calls another method to assign them.
 *
 * @returns Map
 */
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
    List<RawSplit> rawSplits = null;
    JobConf job = new JobConf(orbConf);
    LOG.debug(orbConf.getJobNumber().toString());
    JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
    org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
    try {
        input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
        List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
        rawSplits = new ArrayList<RawSplit>(splits.size());
        DataOutputBuffer buffer = new DataOutputBuffer();
        SerializationFactory factory = new SerializationFactory(orbConf);
        Serializer serializer = factory.getSerializer(splits.get(0).getClass());
        serializer.open(buffer);
        for (int i = 0; i < splits.size(); i++) {
            buffer.reset();
            serializer.serialize(splits.get(i));
            RawSplit rawSplit = new RawSplit();
            rawSplit.setClassName(splits.get(i).getClass().getName());
            rawSplit.setDataLength(splits.get(i).getLength());
            rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
            rawSplit.setLocations(splits.get(i).getLocations());
            rawSplits.add(rawSplit);
        }
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
    return assignInputSplits(rawSplits);
}
Also used : RawSplit(org.goldenorb.io.input.RawSplit) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) IOException(java.io.IOException) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) JobContext(org.apache.hadoop.mapreduce.JobContext) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapreduce.JobID) Serializer(org.apache.hadoop.io.serializer.Serializer)

Example 5 with RawSplit

use of org.goldenorb.io.input.RawSplit in project goldenorb by jzachr.

the class TestInputSplitAllocator method inputSplitAllocatorTest.

/**
 */
@Test
public void inputSplitAllocatorTest() {
    LOG = LoggerFactory.getLogger(TestInputSplitAllocator.class);
    String[] rs1l = { "A", "B" };
    RawSplitWithID rs1 = new RawSplitWithID("rs1", rs1l);
    String[] rs2l = { "B", "C" };
    RawSplitWithID rs2 = new RawSplitWithID("rs2", rs2l);
    String[] rs3l = { "E", "F" };
    RawSplitWithID rs3 = new RawSplitWithID("rs3", rs3l);
    String[] rs4l = { "C" };
    RawSplitWithID rs4 = new RawSplitWithID("rs4", rs4l);
    List<RawSplit> rawSplits = new ArrayList<RawSplit>();
    rawSplits.add(rs1);
    rawSplits.add(rs2);
    rawSplits.add(rs3);
    rawSplits.add(rs4);
    OrbPartitionMember opm1 = new OrbPartitionMember();
    opm1.setHostname("A");
    opm1.setPort(0);
    OrbPartitionMember opm2 = new OrbPartitionMember();
    opm2.setHostname("A");
    opm2.setPort(1);
    OrbPartitionMember opm3 = new OrbPartitionMember();
    opm3.setHostname("B");
    opm3.setPort(0);
    OrbPartitionMember opm4 = new OrbPartitionMember();
    opm4.setHostname("B");
    opm4.setPort(1);
    OrbPartitionMember opm5 = new OrbPartitionMember();
    opm5.setHostname("C");
    opm5.setPort(0);
    OrbPartitionMember opm6 = new OrbPartitionMember();
    opm6.setHostname("C");
    opm6.setPort(1);
    List<OrbPartitionMember> orbPartitionMembers = new ArrayList<OrbPartitionMember>();
    orbPartitionMembers.add(opm1);
    orbPartitionMembers.add(opm2);
    orbPartitionMembers.add(opm3);
    orbPartitionMembers.add(opm4);
    orbPartitionMembers.add(opm5);
    orbPartitionMembers.add(opm6);
    OrbConfiguration orbConf = new OrbConfiguration();
    InputSplitAllocator isa = new InputSplitAllocator(orbConf, orbPartitionMembers);
    Map<OrbPartitionMember, List<RawSplit>> assignedSplits = isa.assignInputSplits(rawSplits);
    for (OrbPartitionMember orbPartitionMember : assignedSplits.keySet()) {
        LOG.info(orbPartitionMember.getHostname() + ":" + orbPartitionMember.getPort() + " | " + assignedSplits.get(orbPartitionMember));
        assertTrue(assignedSplits.get(orbPartitionMember).size() < 2);
    }
}
Also used : RawSplit(org.goldenorb.io.input.RawSplit) InputSplitAllocator(org.goldenorb.io.InputSplitAllocator) OrbConfiguration(org.goldenorb.conf.OrbConfiguration) ArrayList(java.util.ArrayList) OrbPartitionMember(org.goldenorb.jet.OrbPartitionMember) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

RawSplit (org.goldenorb.io.input.RawSplit)5 ArrayList (java.util.ArrayList)4 List (java.util.List)4 OrbPartitionMember (org.goldenorb.jet.OrbPartitionMember)4 InputSplitAllocator (org.goldenorb.io.InputSplitAllocator)3 OrbConfiguration (org.goldenorb.conf.OrbConfiguration)2 Test (org.junit.Test)2 File (java.io.File)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 Path (org.apache.hadoop.fs.Path)1 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)1 SerializationFactory (org.apache.hadoop.io.serializer.SerializationFactory)1 Serializer (org.apache.hadoop.io.serializer.Serializer)1 JobConf (org.apache.hadoop.mapred.JobConf)1 JobContext (org.apache.hadoop.mapreduce.JobContext)1 JobID (org.apache.hadoop.mapreduce.JobID)1