Search in sources :

Example 56 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class LMDBMemoryManager method init.

@Override
public boolean init() {
    try {
        if (lmdbDataPath == null || lmdbDataPath.isNullOrEmpty()) {
            lmdbDataPath = new Path(LMDBMemoryManagerContext.DEFAULT_FOLDER_PATH);
        }
        final File path = new File(lmdbDataPath.getPath());
        if (!path.exists()) {
            path.mkdirs();
        }
        final EnvFlags[] envFlags = envFlags(true, false);
        this.env = create().setMapSize(LMDBMemoryManagerContext.MAP_SIZE_LIMIT).setMaxDbs(LMDBMemoryManagerContext.MAX_DB_INSTANCES).setMaxReaders(LMDBMemoryManagerContext.MAX_READERS).open(path, envFlags);
        // The database supports duplicate values for a single key
        db = env.openDbi(LMDBMemoryManagerContext.DB_NAME, MDB_CREATE);
        dbMap = new HashMap<Integer, Dbi<ByteBuffer>>();
        keyBuffer = ByteBuffer.allocateDirect(LMDBMemoryManagerContext.KEY_BUFF_INIT_CAP);
        dataBuffer = ByteBuffer.allocateDirect(LMDBMemoryManagerContext.DATA_BUFF_INIT_CAP);
        dataQueueMap = new HashMap<>();
        LMDBMemoryManager.needsCommitReader = false;
        LMDBMemoryManager.needsCommitWriter = true;
        Thread writerThread = new Thread(new LMDBDataWriter(dbMap, dataQueueMap, env));
        writerThread.start();
        // populate readTxnStack
        // readTxns = new Stack<>();
        // for (int i = 0; i < 10; i++) {
        // readTxns.push(env.txnRead());
        // }
        threadReadTxn = new ThreadLocal<>();
        // threadWriteTxn = new ThreadLocal<>();
        // threadWriteCursor = new ThreadLocal<>();
        // threadNeedCommit = new ThreadLocal<>();
        threadappendBuffer = new ThreadLocal<>();
    } catch (RuntimeException e) {
        throw new RuntimeException("Error while creating LMDB database at Path " + lmdbDataPath.toString(), e);
    }
    return true;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) Dbi(org.lmdbjava.Dbi) File(java.io.File) EnvFlags(org.lmdbjava.EnvFlags)

Example 57 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class LocalFileSystem method listFiles.

@Override
public FileStatus[] listFiles(Path f) throws IOException {
    final File localf = pathToFile(f);
    FileStatus[] results;
    if (!localf.exists()) {
        return null;
    }
    if (localf.isFile()) {
        return new FileStatus[] { new LocalFileStatus(localf, this) };
    }
    final String[] names = localf.list();
    if (names == null) {
        return null;
    }
    results = new FileStatus[names.length];
    for (int i = 0; i < names.length; i++) {
        results[i] = getFileStatus(new Path(f, names[i]));
    }
    return results;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) File(java.io.File)

Example 58 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class CompleteArrowInputPartitioner method createInputSplits.

/**
 * It creates the split for the complete file.
 *
 * @param minNumSplits Number of minimal input splits, as a hint.
 */
@Override
public FileInputSplit<OT>[] createInputSplits(int minNumSplits) throws IOException {
    if (minNumSplits < 1) {
        throw new IllegalArgumentException("Number of input splits has to be at least 1.");
    }
    int curminNumSplits = Math.max(minNumSplits, this.numSplits);
    final Path path = this.filePath;
    final List<FileInputSplit> inputSplits = new ArrayList<>(curminNumSplits);
    List<FileStatus> files = new ArrayList<>();
    long totalLength = 0;
    final FileSystem fs = FileSystemUtils.get(path, config);
    final FileStatus pathFile = fs.getFileStatus(path);
    if (pathFile.isDir()) {
        totalLength += sumFilesInDir(path, files, true);
    } else {
        files.add(pathFile);
        totalLength += pathFile.getLen();
    }
    final long maxSplitSize = totalLength;
    // Generate the splits
    int splitNum = 0;
    for (final FileStatus file : files) {
        final long len = file.getLen();
        final long blockSize = file.getBlockSize();
        final long localminSplitSize;
        if (this.minSplitSize <= blockSize) {
            localminSplitSize = this.minSplitSize;
        } else {
            LOG.log(Level.WARNING, "Minimal split size of " + this.minSplitSize + " is larger than the block size of " + blockSize + ". Decreasing minimal split size to block size.");
            localminSplitSize = blockSize;
        }
        final long splitSize = Math.max(localminSplitSize, Math.min(maxSplitSize, blockSize));
        if (len > 0) {
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
            Arrays.sort(blocks);
            long position = 0;
            int blockIndex = 0;
            for (int i = 0; i < curminNumSplits; i++) {
                blockIndex = getBlockIndexForPosition(blocks, position, splitSize, blockIndex);
                final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts());
                inputSplits.add(fis);
            }
        } else {
            // special case with a file of zero bytes size
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
            String[] hosts;
            if (blocks.length > 0) {
                hosts = blocks[0].getHosts();
            } else {
                hosts = new String[0];
            }
            for (int i = 0; i < curminNumSplits; i++) {
                final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), 0, 0, hosts);
                inputSplits.add(fis);
            }
        }
    }
    return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) ArrayList(java.util.ArrayList) BlockLocation(edu.iu.dsc.tws.api.data.BlockLocation) FileInputSplit(edu.iu.dsc.tws.data.api.splits.FileInputSplit) CSVInputSplit(edu.iu.dsc.tws.data.api.splits.CSVInputSplit) FileSystem(edu.iu.dsc.tws.api.data.FileSystem)

Example 59 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class CompleteFileInputPartitioner method createInputSplits.

/**
 * It creates the split for the complete file.
 *
 * @param minNumSplits Number of minimal input splits, as a hint.
 */
@Override
public FileInputSplit<OT>[] createInputSplits(int minNumSplits) throws IOException {
    if (minNumSplits < 1) {
        throw new IllegalArgumentException("Number of input splits has to be at least 1.");
    }
    int curminNumSplits = Math.max(minNumSplits, this.numSplits);
    final Path path = this.filePath;
    final List<FileInputSplit> inputSplits = new ArrayList<>(curminNumSplits);
    List<FileStatus> files = new ArrayList<>();
    long totalLength = 0;
    final FileSystem fs = FileSystemUtils.get(path, config);
    final FileStatus pathFile = fs.getFileStatus(path);
    if (pathFile.isDir()) {
        totalLength += sumFilesInDir(path, files, true);
    } else {
        files.add(pathFile);
        totalLength += pathFile.getLen();
    }
    final long maxSplitSize = totalLength;
    // Generate the splits
    int splitNum = 0;
    for (final FileStatus file : files) {
        final long len = file.getLen();
        final long blockSize = file.getBlockSize();
        final long localminSplitSize;
        if (this.minSplitSize <= blockSize) {
            localminSplitSize = this.minSplitSize;
        } else {
            LOG.log(Level.WARNING, "Minimal split size of " + this.minSplitSize + " is larger than the block size of " + blockSize + ". Decreasing minimal split size to block size.");
            localminSplitSize = blockSize;
        }
        final long splitSize = Math.max(localminSplitSize, Math.min(maxSplitSize, blockSize));
        if (len > 0) {
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
            Arrays.sort(blocks);
            long position = 0;
            int blockIndex = 0;
            for (int i = 0; i < curminNumSplits; i++) {
                blockIndex = getBlockIndexForPosition(blocks, position, splitSize, blockIndex);
                FileInputSplit fis = createSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts());
                inputSplits.add(fis);
            }
        } else {
            // special case with a file of zero bytes size
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
            String[] hosts;
            if (blocks.length > 0) {
                hosts = blocks[0].getHosts();
            } else {
                hosts = new String[0];
            }
            for (int i = 0; i < curminNumSplits; i++) {
                final FileInputSplit fis = createSplit(splitNum++, file.getPath(), 0, 0, hosts);
                inputSplits.add(fis);
            }
        }
    }
    return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) ArrayList(java.util.ArrayList) BlockLocation(edu.iu.dsc.tws.api.data.BlockLocation) FileInputSplit(edu.iu.dsc.tws.data.api.splits.FileInputSplit) FileSystem(edu.iu.dsc.tws.api.data.FileSystem)

Example 60 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class ConstraintTaskExample method execute.

@Override
public void execute(WorkerEnvironment workerEnv) {
    int workerId = workerEnv.getWorkerId();
    Config config = workerEnv.getConfig();
    long startTime = System.currentTimeMillis();
    LOG.log(Level.INFO, "Task worker starting: " + workerId);
    ComputeEnvironment cEnv = ComputeEnvironment.init(workerEnv);
    TaskExecutor taskExecutor = cEnv.getTaskExecutor();
    String dinput = String.valueOf(config.get(DataObjectConstants.DINPUT_DIRECTORY));
    int dimension = Integer.parseInt(String.valueOf(config.get(DataObjectConstants.DIMENSIONS)));
    int parallelismValue = Integer.parseInt(String.valueOf(config.get(DataObjectConstants.PARALLELISM_VALUE)));
    int dsize = Integer.parseInt(String.valueOf(config.get(DataObjectConstants.DSIZE)));
    DataGenerator dataGenerator = new DataGenerator(config, workerId);
    dataGenerator.generate(new Path(dinput), dsize, dimension);
    ComputeGraph firstGraph = buildFirstGraph(parallelismValue, config, dinput, dsize, dimension, "firstgraphpoints", "1");
    ComputeGraph secondGraph = buildSecondGraph(parallelismValue, config, dimension, "firstgraphpoints", "1");
    // Get the execution plan for the first task graph
    ExecutionPlan firstGraphExecutionPlan = taskExecutor.plan(firstGraph);
    taskExecutor.execute(firstGraph, firstGraphExecutionPlan);
    DataObject<Object> firstGraphObject = taskExecutor.getOutput("firstsink");
    // Get the execution plan for the second task graph
    ExecutionPlan secondGraphExecutionPlan = taskExecutor.plan(secondGraph);
    taskExecutor.addInput("firstgraphpoints", firstGraphObject);
    taskExecutor.execute(secondGraph, secondGraphExecutionPlan);
    long endTime = System.currentTimeMillis();
    LOG.info("Total Execution Time: " + (endTime - startTime));
}
Also used : ComputeEnvironment(edu.iu.dsc.tws.task.ComputeEnvironment) Path(edu.iu.dsc.tws.api.data.Path) TaskExecutor(edu.iu.dsc.tws.task.impl.TaskExecutor) ExecutionPlan(edu.iu.dsc.tws.api.compute.executor.ExecutionPlan) Config(edu.iu.dsc.tws.api.config.Config) JobConfig(edu.iu.dsc.tws.api.JobConfig) ComputeGraph(edu.iu.dsc.tws.api.compute.graph.ComputeGraph) DataObject(edu.iu.dsc.tws.api.dataset.DataObject)

Aggregations

Path (edu.iu.dsc.tws.api.data.Path)61 IOException (java.io.IOException)23 FileSystem (edu.iu.dsc.tws.api.data.FileSystem)19 FileStatus (edu.iu.dsc.tws.api.data.FileStatus)14 ArrayList (java.util.ArrayList)12 Config (edu.iu.dsc.tws.api.config.Config)11 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)8 FileInputSplit (edu.iu.dsc.tws.data.api.splits.FileInputSplit)8 ExecutionRuntime (edu.iu.dsc.tws.executor.core.ExecutionRuntime)8 BlockLocation (edu.iu.dsc.tws.api.data.BlockLocation)7 FSDataOutputStream (edu.iu.dsc.tws.api.data.FSDataOutputStream)7 PrintWriter (java.io.PrintWriter)7 File (java.io.File)6 LocalTextInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner)5 Test (org.junit.Test)5 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)4 LocalCSVInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalCSVInputPartitioner)4 LocalFixedInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalFixedInputPartitioner)4 DataGenerator (edu.iu.dsc.tws.tsched.utils.DataGenerator)4 CSVInputSplit (edu.iu.dsc.tws.data.api.splits.CSVInputSplit)3