Search in sources :

Example 46 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class DataNodeLocatorUtils method findDataNodesLocation.

/**
 * This method receives the input file name of a vertex and find the location of the datanodes
 * in the HDFS and returns the data node list.
 *
 * @return datanodes list
 */
public List<String> findDataNodesLocation(String inputFileName) {
    List<String> dataNodes = new ArrayList<>();
    FileSystem fileSystem;
    try {
        Path path = new Path(inputFileName);
        fileSystem = FileSystemUtils.get(path.toUri(), config);
        if (config.get(DataObjectConstants.FILE_SYSTEM).equals(DataContext.TWISTER2_HDFS_FILESYSTEM)) {
            FileStatus fileStatus = fileSystem.getFileStatus(path);
            if (!fileStatus.getPath().isNullOrEmpty()) {
                dataNodes = getDataNodes();
            }
        } else if (config.get(DataObjectConstants.FILE_SYSTEM).equals(DataContext.TWISTER2_LOCAL_FILESYSTEM)) {
            FileStatus fileStatus = fileSystem.getFileStatus(path);
            if (!fileStatus.getPath().isNullOrEmpty()) {
                String datanodeName = InetAddress.getLocalHost().getHostName();
                dataNodes.add(datanodeName);
            }
        }
    } catch (IOException ioe) {
        throw new RuntimeException("IOException Occured");
    }
    return dataNodes;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 47 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class FileSystemUtils method getFileSystem.

/**
 * For hadoop file system
 */
public static FileSystem getFileSystem(URI uri, Config config) throws IOException {
    FileSystem fs = null;
    URI asked = uri;
    URI curUri = uri;
    if (curUri == null) {
        throw new IOException("The URI " + curUri.toString() + " is not a vaild URI");
    }
    // TODO: check if the sycn is actually needed or can be scoped down
    synchronized (SYNCHRONIZATION_OBJECT) {
        if (curUri.getScheme() == null) {
            try {
                if (defaultScheme == null) {
                    defaultScheme = new URI(DataConstants.DEFAULT_FILESYSTEM_SCHEME);
                }
                curUri = new URI(defaultScheme.getScheme(), null, defaultScheme.getHost(), defaultScheme.getPort(), curUri.getPath(), null, null);
            } catch (URISyntaxException e) {
                try {
                    if (defaultScheme.getScheme().equals("file")) {
                        curUri = new URI("file", null, new Path(new File(curUri.getPath()).getAbsolutePath()).toUri().getPath(), null);
                    }
                } catch (URISyntaxException ex) {
                    // we tried to repair it, but could not. report the scheme error
                    throw new IOException("The URI '" + curUri.toString() + "' is not valid.");
                }
            }
        }
        if (curUri.getScheme() == null) {
            throw new IOException("The URI '" + curUri + "' is invalid.\n" + "The fs.default-scheme = " + defaultScheme + ", the requested URI = " + asked + ", and the final URI = " + curUri + ".");
        }
        if (curUri.getScheme().equals("file") && curUri.getAuthority() != null && !curUri.getAuthority().isEmpty()) {
            String supposedUri = "file:///" + curUri.getAuthority() + curUri.getPath();
            throw new IOException("Found local file path with authority '" + curUri.getAuthority() + "' in path '" + curUri.toString() + "'. Hint: Did you forget a slash? (correct path would be '" + supposedUri + "')");
        }
        // TODO : need to add cache that can save FileSystem Objects and return from cache if available
        if (!isSupportedScheme(curUri.getScheme())) {
        // TODO: handle when the system is not supported
        } else {
            String fsClass = SUPPORTEDFS.get(curUri.getScheme());
            if (DataContext.TWISTER2_HDFS_FILESYSTEM.equals(curUri.getScheme())) {
                try {
                    fs = instantiateFileSystem(fsClass, config);
                } catch (NoSuchMethodException e) {
                    throw new RuntimeException("No such method to invoke", e);
                } catch (InvocationTargetException e) {
                    throw new RuntimeException("Invocation exception occured", e);
                }
                fs.initialize(curUri);
            } else {
                fs = instantiateFileSystem(fsClass);
                fs.initialize(curUri);
            }
        }
    }
    return fs;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) HadoopFileSystem(edu.iu.dsc.tws.data.hdfs.HadoopFileSystem) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) LocalFileSystem(edu.iu.dsc.tws.data.fs.local.LocalFileSystem) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) File(java.io.File) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 48 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class FileSystemUtils method getFileSystem.

/**
 * Returns a unsafe filesystem for the given uri
 */
public static FileSystem getFileSystem(URI uri) throws IOException {
    FileSystem fs = null;
    URI asked = uri;
    URI curUri = uri;
    if (curUri == null) {
        throw new IOException("The URI " + curUri.toString() + " is not a vaild URI");
    }
    // TODO: check if the sycn is actually needed or can be scoped down
    synchronized (SYNCHRONIZATION_OBJECT) {
        if (curUri.getScheme() == null) {
            try {
                if (defaultScheme == null) {
                    defaultScheme = new URI(DataConstants.DEFAULT_FILESYSTEM_SCHEME);
                }
                curUri = new URI(defaultScheme.getScheme(), null, defaultScheme.getHost(), defaultScheme.getPort(), curUri.getPath(), null, null);
            } catch (URISyntaxException e) {
                try {
                    if (defaultScheme.getScheme().equals("file")) {
                        curUri = new URI("file", null, new Path(new File(curUri.getPath()).getAbsolutePath()).toUri().getPath(), null);
                    }
                } catch (URISyntaxException ex) {
                    // we tried to repair it, but could not. report the scheme error
                    throw new IOException("The URI '" + curUri.toString() + "' is not valid.");
                }
            }
        }
        if (curUri.getScheme() == null) {
            throw new IOException("The URI '" + curUri + "' is invalid.\n" + "The fs.default-scheme = " + defaultScheme + ", the requested URI = " + asked + ", and the final URI = " + curUri + ".");
        }
        if (curUri.getScheme().equals("file") && curUri.getAuthority() != null && !curUri.getAuthority().isEmpty()) {
            String supposedUri = "file:///" + curUri.getAuthority() + curUri.getPath();
            throw new IOException("Found local file path with authority '" + curUri.getAuthority() + "' in path '" + curUri.toString() + "'. Hint: Did you forget a slash? (correct path would be '" + supposedUri + "')");
        }
        // TODO : need to add cache that can save FileSystem Objects and return from cache if available
        if (!isSupportedScheme(curUri.getScheme())) {
        // TODO: handle when the system is not supported
        } else {
            String fsClass = SUPPORTEDFS.get(curUri.getScheme());
            fs = instantiateFileSystem(fsClass);
            fs.initialize(curUri);
        }
    }
    return fs;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) HadoopFileSystem(edu.iu.dsc.tws.data.hdfs.HadoopFileSystem) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) LocalFileSystem(edu.iu.dsc.tws.data.fs.local.LocalFileSystem) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) File(java.io.File)

Example 49 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class CompleteCSVInputPartitioner method createInputSplits.

/**
 * It creates the split for the complete file.
 *
 * @param minNumSplits Number of minimal input splits, as a hint.
 */
@Override
public FileInputSplit<OT>[] createInputSplits(int minNumSplits) throws IOException {
    if (minNumSplits < 1) {
        throw new IllegalArgumentException("Number of input splits has to be at least 1.");
    }
    int curminNumSplits = Math.max(minNumSplits, this.numSplits);
    final Path path = this.filePath;
    final List<FileInputSplit> inputSplits = new ArrayList<>(curminNumSplits);
    List<FileStatus> files = new ArrayList<>();
    long totalLength = 0;
    final FileSystem fs = FileSystemUtils.get(path, config);
    final FileStatus pathFile = fs.getFileStatus(path);
    if (pathFile.isDir()) {
        totalLength += sumFilesInDir(path, files, true);
    } else {
        files.add(pathFile);
        totalLength += pathFile.getLen();
    }
    final long maxSplitSize = totalLength;
    // Generate the splits
    int splitNum = 0;
    for (final FileStatus file : files) {
        final long len = file.getLen();
        final long blockSize = file.getBlockSize();
        final long localminSplitSize;
        if (this.minSplitSize <= blockSize) {
            localminSplitSize = this.minSplitSize;
        } else {
            LOG.log(Level.WARNING, "Minimal split size of " + this.minSplitSize + " is larger than the block size of " + blockSize + ". Decreasing minimal split size to block size.");
            localminSplitSize = blockSize;
        }
        final long splitSize = Math.max(localminSplitSize, Math.min(maxSplitSize, blockSize));
        if (len > 0) {
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
            Arrays.sort(blocks);
            long position = 0;
            int blockIndex = 0;
            for (int i = 0; i < curminNumSplits; i++) {
                blockIndex = getBlockIndexForPosition(blocks, position, splitSize, blockIndex);
                final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts());
                inputSplits.add(fis);
            }
        } else {
            // special case with a file of zero bytes size
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
            String[] hosts;
            if (blocks.length > 0) {
                hosts = blocks[0].getHosts();
            } else {
                hosts = new String[0];
            }
            for (int i = 0; i < curminNumSplits; i++) {
                final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), 0, 0, hosts);
                inputSplits.add(fis);
            }
        }
    }
    return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) ArrayList(java.util.ArrayList) BlockLocation(edu.iu.dsc.tws.api.data.BlockLocation) FileInputSplit(edu.iu.dsc.tws.data.api.splits.FileInputSplit) CSVInputSplit(edu.iu.dsc.tws.data.api.splits.CSVInputSplit) FileSystem(edu.iu.dsc.tws.api.data.FileSystem)

Example 50 with Path

use of edu.iu.dsc.tws.api.data.Path in project twister2 by DSC-SPIDAL.

the class FileInputPartitioner method createInputSplits.

/**
 * Computes the input splits for the file. By default, one file block is one split. If more splits
 * are requested than blocks are available, then a split may be a fraction of a
 * block and splits may cross block boundaries.
 *
 * @param minNumSplits The minimum desired number of file splits.
 * @return The computed file splits.
 */
@Override
public FileInputSplit<OT>[] createInputSplits(int minNumSplits) throws IOException {
    if (minNumSplits < 1) {
        throw new IllegalArgumentException("Number of input splits has to be at least 1.");
    }
    // take the desired number of splits into account
    int curminNumSplits = Math.max(minNumSplits, this.numSplits);
    final Path path = this.filePath;
    final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(curminNumSplits);
    // get all the files that are involved in the splits
    List<FileStatus> files = new ArrayList<FileStatus>();
    long totalLength = 0;
    // final FileSystem fs = path.getFileSystem();
    final FileSystem fs = FileSystemUtils.get(path, config);
    final FileStatus pathFile = fs.getFileStatus(path);
    if (pathFile.isDir()) {
        totalLength += sumFilesInDir(path, files, true);
    } else {
        // TODO L3: implement test for unsplittable
        // testForUnsplittable(pathFile);
        files.add(pathFile);
        totalLength += pathFile.getLen();
    }
    // TODO L3: Handle if unsplittable
    // TODO L1: check if we can add the i j method when making splits so that the last split is not
    // larger than the other splits
    final long maxSplitSize = totalLength / curminNumSplits + (totalLength % curminNumSplits == 0 ? 0 : 1);
    // Generate the splits
    int splitNum = 0;
    for (final FileStatus file : files) {
        final long len = file.getLen();
        final long blockSize = file.getBlockSize();
        final long localminSplitSize;
        if (this.minSplitSize <= blockSize) {
            localminSplitSize = this.minSplitSize;
        } else {
            LOG.log(Level.WARNING, "Minimal split size of " + this.minSplitSize + " is larger than the block size of " + blockSize + ". Decreasing minimal split size to block size.");
            localminSplitSize = blockSize;
        }
        final long splitSize = Math.max(localminSplitSize, Math.min(maxSplitSize, blockSize));
        final long halfSplit = splitSize >>> 1;
        final long maxBytesForLastSplit = (long) (splitSize * MAX_SPLIT_SIZE_DISCREPANCY);
        if (len > 0) {
            // get the block locations and make sure they are in order with respect to their offset
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
            Arrays.sort(blocks);
            long bytesUnassigned = len;
            long position = 0;
            int blockIndex = 0;
            while (bytesUnassigned > maxBytesForLastSplit) {
                // get the block containing the majority of the data
                blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
                // create a new split
                FileInputSplit fis = createSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts());
                inputSplits.add(fis);
                // adjust the positions
                position += splitSize;
                bytesUnassigned -= splitSize;
            }
            if (bytesUnassigned > 0) {
                blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
                final FileInputSplit fis = createSplit(splitNum++, file.getPath(), position, bytesUnassigned, blocks[blockIndex].getHosts());
                inputSplits.add(fis);
            }
        } else {
            // special case with a file of zero bytes size
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
            String[] hosts;
            if (blocks.length > 0) {
                hosts = blocks[0].getHosts();
            } else {
                hosts = new String[0];
            }
            final FileInputSplit fis = createSplit(splitNum++, file.getPath(), 0, 0, hosts);
            inputSplits.add(fis);
        }
    }
    LOG.fine("input splits value:" + inputSplits.size() + "\t" + Arrays.toString(inputSplits.toArray()));
    return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) ArrayList(java.util.ArrayList) BlockLocation(edu.iu.dsc.tws.api.data.BlockLocation) FileInputSplit(edu.iu.dsc.tws.data.api.splits.FileInputSplit) FileSystem(edu.iu.dsc.tws.api.data.FileSystem)

Aggregations

Path (edu.iu.dsc.tws.api.data.Path)61 IOException (java.io.IOException)23 FileSystem (edu.iu.dsc.tws.api.data.FileSystem)19 FileStatus (edu.iu.dsc.tws.api.data.FileStatus)14 ArrayList (java.util.ArrayList)12 Config (edu.iu.dsc.tws.api.config.Config)11 Twister2RuntimeException (edu.iu.dsc.tws.api.exceptions.Twister2RuntimeException)8 FileInputSplit (edu.iu.dsc.tws.data.api.splits.FileInputSplit)8 ExecutionRuntime (edu.iu.dsc.tws.executor.core.ExecutionRuntime)8 BlockLocation (edu.iu.dsc.tws.api.data.BlockLocation)7 FSDataOutputStream (edu.iu.dsc.tws.api.data.FSDataOutputStream)7 PrintWriter (java.io.PrintWriter)7 File (java.io.File)6 LocalTextInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalTextInputPartitioner)5 Test (org.junit.Test)5 ComputeGraph (edu.iu.dsc.tws.api.compute.graph.ComputeGraph)4 LocalCSVInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalCSVInputPartitioner)4 LocalFixedInputPartitioner (edu.iu.dsc.tws.data.api.formatters.LocalFixedInputPartitioner)4 DataGenerator (edu.iu.dsc.tws.tsched.utils.DataGenerator)4 CSVInputSplit (edu.iu.dsc.tws.data.api.splits.CSVInputSplit)3