Search in sources :

Example 6 with Path

use of edu.iu.dsc.tws.data.fs.Path in project twister2 by DSC-SPIDAL.

the class BinaryInputFormatter method createInputSplits.

/**
 * Computes the input splits for the file. By default, one file block is one split. If more
 * splits are requested than blocks are available, then a split may be a fraction of a block and
 * splits may cross block boundaries.
 *
 * @param minNumSplits The minimum desired number of file splits.
 * @return The computed file splits.
 */
@Override
public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException {
    if (minNumSplits < 1) {
        throw new IllegalArgumentException("Number of input splits has to be at least 1.");
    }
    // TODO L2: The current implementaion only handles a snigle binary file not a set of files
    // take the desired number of splits into account
    int curminNumSplits = Math.max(minNumSplits, this.numSplits);
    final Path path = this.filePath;
    final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(curminNumSplits);
    // get all the files that are involved in the splits
    List<FileStatus> files = new ArrayList<FileStatus>();
    long totalLength = 0;
    final FileSystem fs = path.getFileSystem();
    final FileStatus pathFile = fs.getFileStatus(path);
    if (pathFile.isDir()) {
        totalLength += sumFilesInDir(path, files, true);
    } else {
        // TODO L3: implement test for unsplittable
        // testForUnsplittable(pathFile);
        files.add(pathFile);
        totalLength += pathFile.getLen();
    }
    // Odd records will be divided among the first splits so the max diff would be 1 record
    if (totalLength % this.recordLength != 0) {
        throw new IllegalStateException("The Binary file has a incomplete record");
    }
    long numberOfRecords = totalLength / this.recordLength;
    long minRecordsForSplit = Math.floorDiv(numberOfRecords, minNumSplits);
    long oddRecords = numberOfRecords % minNumSplits;
    // Generate the splits
    int splitNum = 0;
    for (final FileStatus file : files) {
        final long len = file.getLen();
        final long blockSize = file.getBlockSize();
        final long minSplitSize = minRecordsForSplit * this.recordLength;
        long currentSplitSize = minSplitSize;
        long halfSplit = currentSplitSize >>> 1;
        if (oddRecords > 0) {
            currentSplitSize = currentSplitSize + this.recordLength;
            oddRecords--;
        }
        if (len > 0) {
            // get the block locations and make sure they are in order with respect to their offset
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
            Arrays.sort(blocks);
            long bytesUnassigned = len;
            long position = 0;
            int blockIndex = 0;
            while (bytesUnassigned >= currentSplitSize) {
                // get the block containing the majority of the data
                blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
                // create a new split
                FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, currentSplitSize, blocks[blockIndex].getHosts());
                inputSplits.add(fis);
                // adjust the positions
                position += currentSplitSize;
                bytesUnassigned -= currentSplitSize;
            }
        } else {
            throw new IllegalStateException("The binary file " + file.getPath() + " is Empty");
        }
    }
    return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Also used : Path(edu.iu.dsc.tws.data.fs.Path) FileStatus(edu.iu.dsc.tws.data.fs.FileStatus) ArrayList(java.util.ArrayList) BlockLocation(edu.iu.dsc.tws.data.fs.BlockLocation) FileInputSplit(edu.iu.dsc.tws.data.fs.FileInputSplit) FileSystem(edu.iu.dsc.tws.data.fs.FileSystem)

Example 7 with Path

use of edu.iu.dsc.tws.data.fs.Path in project twister2 by DSC-SPIDAL.

the class SimpleTaskQueueWithMM method init.

/**
 * Initialize the container
 */
public void init(Config cfg, int containerId, ResourcePlan plan) {
    LOG.log(Level.INFO, "Starting the example with container id: " + plan.getThisId());
    // Creates task an task executor instance to be used in this container
    taskExecutor = new TaskExecutorFixedThread();
    this.status = Status.INIT;
    // lets create the task plan
    TaskPlan taskPlan = Utils.createTaskPlan(cfg, plan);
    // first get the communication config file
    TWSNetwork network = new TWSNetwork(cfg, taskPlan);
    TWSCommunication channel = network.getDataFlowTWSCommunication();
    // we are sending messages from 0th task to 1st task
    Set<Integer> sources = new HashSet<>();
    sources.add(0);
    int dests = 1;
    Map<String, Object> newCfg = new HashMap<>();
    LOG.info("Setting up reduce dataflow operation");
    Path dataPath = new Path("/home/pulasthi/work/twister2/lmdbdatabase");
    MemoryManager memoryManager = new LMDBMemoryManager(dataPath);
    // this method calls the init method
    // I think this is wrong
    // TODO: Does the task genereate the communication or is it done by a controller for examples
    // the direct comm between task 0 and 1 is it done by the container or the the task
    // TODO: if the task creates the dataflowop does the task progress it or the executor
    // TODO : FOR NOW the dataflowop is created at container and sent to task
    LinkedQueue<Message> pongQueue = new LinkedQueue<Message>();
    taskExecutor.registerQueue(0, pongQueue);
    direct = channel.direct(newCfg, MessageType.OBJECT, 0, sources, dests, new PingPongReceive());
    taskExecutor.initCommunication(channel, direct);
    // Memory Manager
    if (containerId == 0) {
        byte[] val = Longs.toByteArray(1231212121213L);
        byte[] val2 = Longs.toByteArray(22222222L);
        ByteBuffer valbuf = ByteBuffer.allocateDirect(8192);
        memoryManager.put(0, "temp", valbuf);
    // memoryManager.put(0, "temp", val);
    // memoryManager.put(0, "temp", val2);
    // the map thread where data is produced
    // LOG.log(Level.INFO, "Starting map thread");
    // SourceTask<Object> mapTask = new MapWorker(0, direct);
    // mapTask.setMemoryManager(memoryManager);
    // taskExecutor.registerTask(mapTask);
    // taskExecutor.submitTask(0);
    // taskExecutor.progres();
    } else if (containerId == 1) {
        byte[] val3 = Longs.toByteArray(3333333L);
        ByteBuffer val3buf = ByteBuffer.wrap(val3);
        try {
            Thread.sleep(2000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        ByteBuffer results = memoryManager.get(0, "temp");
        if (results.limit() == 8192) {
            System.out.println("Correct " + results.limit());
        }
        ByteBuffer valbuf2 = ByteBuffer.allocateDirect(16192);
        memoryManager.put(0, "temp", valbuf2);
        results = memoryManager.get(0, "temp");
        if (results.limit() == 16192) {
            System.out.println("Correct " + results.limit());
        }
        ByteBuffer results2 = memoryManager.get(0, "temp");
        ByteBuffer results3 = memoryManager.get(0, "temp");
        if (results2 == null) {
            System.out.println("Missing key is null");
        }
        if (results3.getLong() == 1231212121213L) {
            System.out.println("Long value is correct");
        }
        memoryManager.append(0, "temp", val3buf);
        ByteBuffer resultsappend = memoryManager.get(0, "temp");
        System.out.println("Long value 1 :" + resultsappend.getLong());
        System.out.println("Long value 1 :" + resultsappend.getLong());
    // ArrayList<Integer> inq = new ArrayList<>();
    // inq.add(0);
    // taskExecutor.setTaskMessageProcessLimit(10000);
    // SinkTask<Object> recTask = new RecieveWorker(1);
    // recTask.setMemoryManager(memoryManager);
    // taskExecutor.registerSinkTask(recTask, inq);
    // taskExecutor.progres();
    }
}
Also used : Path(edu.iu.dsc.tws.data.fs.Path) Message(edu.iu.dsc.tws.task.api.Message) TaskExecutorFixedThread(edu.iu.dsc.tws.task.core.TaskExecutorFixedThread) HashMap(java.util.HashMap) TWSCommunication(edu.iu.dsc.tws.comms.core.TWSCommunication) TaskPlan(edu.iu.dsc.tws.comms.core.TaskPlan) TWSNetwork(edu.iu.dsc.tws.comms.core.TWSNetwork) LinkedQueue(edu.iu.dsc.tws.task.api.LinkedQueue) LMDBMemoryManager(edu.iu.dsc.tws.data.memory.lmdb.LMDBMemoryManager) MemoryManager(edu.iu.dsc.tws.data.memory.MemoryManager) LMDBMemoryManager(edu.iu.dsc.tws.data.memory.lmdb.LMDBMemoryManager) ByteBuffer(java.nio.ByteBuffer) HashSet(java.util.HashSet)

Aggregations

Path (edu.iu.dsc.tws.data.fs.Path)7 LMDBMemoryManager (edu.iu.dsc.tws.data.memory.lmdb.LMDBMemoryManager)4 FileInputSplit (edu.iu.dsc.tws.data.fs.FileInputSplit)3 MemoryManager (edu.iu.dsc.tws.data.memory.MemoryManager)3 ByteBuffer (java.nio.ByteBuffer)3 BlockLocation (edu.iu.dsc.tws.data.fs.BlockLocation)2 FileStatus (edu.iu.dsc.tws.data.fs.FileStatus)2 FileSystem (edu.iu.dsc.tws.data.fs.FileSystem)2 BufferedMemoryManager (edu.iu.dsc.tws.data.memory.BufferedMemoryManager)2 OperationMemoryManager (edu.iu.dsc.tws.data.memory.OperationMemoryManager)2 ArrayList (java.util.ArrayList)2 Config (edu.iu.dsc.tws.common.config.Config)1 TWSCommunication (edu.iu.dsc.tws.comms.core.TWSCommunication)1 TWSNetwork (edu.iu.dsc.tws.comms.core.TWSNetwork)1 TaskPlan (edu.iu.dsc.tws.comms.core.TaskPlan)1 KryoSerializer (edu.iu.dsc.tws.comms.utils.KryoSerializer)1 InputFormat (edu.iu.dsc.tws.data.api.InputFormat)1 TextInputFormatter (edu.iu.dsc.tws.data.api.formatters.TextInputFormatter)1 InputSplit (edu.iu.dsc.tws.data.fs.io.InputSplit)1 InputSplitAssigner (edu.iu.dsc.tws.data.fs.io.InputSplitAssigner)1