Search in sources :

Example 16 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class FileSystemUtils method getFileSystem.

/**
 * For hadoop file system
 */
public static FileSystem getFileSystem(URI uri, Config config) throws IOException {
    FileSystem fs = null;
    URI asked = uri;
    URI curUri = uri;
    if (curUri == null) {
        throw new IOException("The URI " + curUri.toString() + " is not a vaild URI");
    }
    // TODO: check if the sycn is actually needed or can be scoped down
    synchronized (SYNCHRONIZATION_OBJECT) {
        if (curUri.getScheme() == null) {
            try {
                if (defaultScheme == null) {
                    defaultScheme = new URI(DataConstants.DEFAULT_FILESYSTEM_SCHEME);
                }
                curUri = new URI(defaultScheme.getScheme(), null, defaultScheme.getHost(), defaultScheme.getPort(), curUri.getPath(), null, null);
            } catch (URISyntaxException e) {
                try {
                    if (defaultScheme.getScheme().equals("file")) {
                        curUri = new URI("file", null, new Path(new File(curUri.getPath()).getAbsolutePath()).toUri().getPath(), null);
                    }
                } catch (URISyntaxException ex) {
                    // we tried to repair it, but could not. report the scheme error
                    throw new IOException("The URI '" + curUri.toString() + "' is not valid.");
                }
            }
        }
        if (curUri.getScheme() == null) {
            throw new IOException("The URI '" + curUri + "' is invalid.\n" + "The fs.default-scheme = " + defaultScheme + ", the requested URI = " + asked + ", and the final URI = " + curUri + ".");
        }
        if (curUri.getScheme().equals("file") && curUri.getAuthority() != null && !curUri.getAuthority().isEmpty()) {
            String supposedUri = "file:///" + curUri.getAuthority() + curUri.getPath();
            throw new IOException("Found local file path with authority '" + curUri.getAuthority() + "' in path '" + curUri.toString() + "'. Hint: Did you forget a slash? (correct path would be '" + supposedUri + "')");
        }
        // TODO : need to add cache that can save FileSystem Objects and return from cache if available
        if (!isSupportedScheme(curUri.getScheme())) {
        // TODO: handle when the system is not supported
        } else {
            String fsClass = SUPPORTEDFS.get(curUri.getScheme());
            if (DataContext.TWISTER2_HDFS_FILESYSTEM.equals(curUri.getScheme())) {
                try {
                    fs = instantiateFileSystem(fsClass, config);
                } catch (NoSuchMethodException e) {
                    throw new RuntimeException("No such method to invoke", e);
                } catch (InvocationTargetException e) {
                    throw new RuntimeException("Invocation exception occured", e);
                }
                fs.initialize(curUri);
            } else {
                fs = instantiateFileSystem(fsClass);
                fs.initialize(curUri);
            }
        }
    }
    return fs;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) HadoopFileSystem(edu.iu.dsc.tws.data.hdfs.HadoopFileSystem) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) LocalFileSystem(edu.iu.dsc.tws.data.fs.local.LocalFileSystem) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) File(java.io.File) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 17 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class FileSystemUtils method instantiateFileSystem.

private static FileSystem instantiateFileSystem(String className, Config config) throws IOException, NoSuchMethodException, InvocationTargetException {
    Class<?> fileSystemClass;
    Object newInstance;
    try {
        Configuration conf = new Configuration(true);
        conf.addResource(new org.apache.hadoop.fs.Path(HdfsDataContext.getHdfsConfigDirectory(config)));
        conf.addResource(new org.apache.hadoop.fs.Path(HdfsDataContext.getHdfsDataDirectory(config)));
        conf.set("fs.defaultFS", getHdfsURL(config));
        fileSystemClass = ClassLoader.getSystemClassLoader().loadClass(className);
        Constructor<?> classConstructor = fileSystemClass.getConstructor(Configuration.class, org.apache.hadoop.fs.FileSystem.class);
        newInstance = classConstructor.newInstance(new Object[] { conf, org.apache.hadoop.fs.FileSystem.get(conf) });
    } catch (InstantiationException e) {
        throw new IOException("instantiation exception occured:" + e.getMessage());
    } catch (ClassNotFoundException e) {
        throw new IOException("Could not load file system class '" + className + '\'', e);
    } catch (IllegalAccessException e) {
        throw new IOException("Illegal access exception: " + e.getMessage(), e);
    }
    return (FileSystem) newInstance;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HadoopFileSystem(edu.iu.dsc.tws.data.hdfs.HadoopFileSystem) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) LocalFileSystem(edu.iu.dsc.tws.data.fs.local.LocalFileSystem) IOException(java.io.IOException)

Example 18 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class FileSystemUtils method getFileSystem.

/**
 * Returns a unsafe filesystem for the given uri
 */
public static FileSystem getFileSystem(URI uri) throws IOException {
    FileSystem fs = null;
    URI asked = uri;
    URI curUri = uri;
    if (curUri == null) {
        throw new IOException("The URI " + curUri.toString() + " is not a vaild URI");
    }
    // TODO: check if the sycn is actually needed or can be scoped down
    synchronized (SYNCHRONIZATION_OBJECT) {
        if (curUri.getScheme() == null) {
            try {
                if (defaultScheme == null) {
                    defaultScheme = new URI(DataConstants.DEFAULT_FILESYSTEM_SCHEME);
                }
                curUri = new URI(defaultScheme.getScheme(), null, defaultScheme.getHost(), defaultScheme.getPort(), curUri.getPath(), null, null);
            } catch (URISyntaxException e) {
                try {
                    if (defaultScheme.getScheme().equals("file")) {
                        curUri = new URI("file", null, new Path(new File(curUri.getPath()).getAbsolutePath()).toUri().getPath(), null);
                    }
                } catch (URISyntaxException ex) {
                    // we tried to repair it, but could not. report the scheme error
                    throw new IOException("The URI '" + curUri.toString() + "' is not valid.");
                }
            }
        }
        if (curUri.getScheme() == null) {
            throw new IOException("The URI '" + curUri + "' is invalid.\n" + "The fs.default-scheme = " + defaultScheme + ", the requested URI = " + asked + ", and the final URI = " + curUri + ".");
        }
        if (curUri.getScheme().equals("file") && curUri.getAuthority() != null && !curUri.getAuthority().isEmpty()) {
            String supposedUri = "file:///" + curUri.getAuthority() + curUri.getPath();
            throw new IOException("Found local file path with authority '" + curUri.getAuthority() + "' in path '" + curUri.toString() + "'. Hint: Did you forget a slash? (correct path would be '" + supposedUri + "')");
        }
        // TODO : need to add cache that can save FileSystem Objects and return from cache if available
        if (!isSupportedScheme(curUri.getScheme())) {
        // TODO: handle when the system is not supported
        } else {
            String fsClass = SUPPORTEDFS.get(curUri.getScheme());
            fs = instantiateFileSystem(fsClass);
            fs.initialize(curUri);
        }
    }
    return fs;
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) HadoopFileSystem(edu.iu.dsc.tws.data.hdfs.HadoopFileSystem) FileSystem(edu.iu.dsc.tws.api.data.FileSystem) LocalFileSystem(edu.iu.dsc.tws.data.fs.local.LocalFileSystem) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) File(java.io.File)

Example 19 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class CSVInputPartitioner method sumFilesInDir.

long sumFilesInDir(Path path, List<FileStatus> files, boolean logExcludedFiles) throws IOException {
    final FileSystem fs = FileSystemUtils.get(path);
    long length = 0;
    for (FileStatus file : fs.listFiles(path)) {
        if (file.isDir()) {
            if (acceptFile(file) && enumerateNestedFiles) {
                length += sumFilesInDir(file.getPath(), files, logExcludedFiles);
            } else {
                if (logExcludedFiles) {
                    LOG.log(Level.INFO, "Directory " + file.getPath().toString() + " did not pass the " + "file-filter and is excluded.");
                }
            }
        } else {
            if (acceptFile(file)) {
                files.add(file);
                length += file.getLen();
            } else {
                if (logExcludedFiles) {
                    LOG.log(Level.INFO, "Directory " + file.getPath().toString() + " did not pass the file-filter and is excluded.");
                }
            }
        }
    }
    return length;
}
Also used : FileStatus(edu.iu.dsc.tws.api.data.FileStatus) FileSystem(edu.iu.dsc.tws.api.data.FileSystem)

Example 20 with FileSystem

use of edu.iu.dsc.tws.api.data.FileSystem in project twister2 by DSC-SPIDAL.

the class CompleteCSVInputPartitioner method createInputSplits.

/**
 * It creates the split for the complete file.
 *
 * @param minNumSplits Number of minimal input splits, as a hint.
 */
@Override
public FileInputSplit<OT>[] createInputSplits(int minNumSplits) throws IOException {
    if (minNumSplits < 1) {
        throw new IllegalArgumentException("Number of input splits has to be at least 1.");
    }
    int curminNumSplits = Math.max(minNumSplits, this.numSplits);
    final Path path = this.filePath;
    final List<FileInputSplit> inputSplits = new ArrayList<>(curminNumSplits);
    List<FileStatus> files = new ArrayList<>();
    long totalLength = 0;
    final FileSystem fs = FileSystemUtils.get(path, config);
    final FileStatus pathFile = fs.getFileStatus(path);
    if (pathFile.isDir()) {
        totalLength += sumFilesInDir(path, files, true);
    } else {
        files.add(pathFile);
        totalLength += pathFile.getLen();
    }
    final long maxSplitSize = totalLength;
    // Generate the splits
    int splitNum = 0;
    for (final FileStatus file : files) {
        final long len = file.getLen();
        final long blockSize = file.getBlockSize();
        final long localminSplitSize;
        if (this.minSplitSize <= blockSize) {
            localminSplitSize = this.minSplitSize;
        } else {
            LOG.log(Level.WARNING, "Minimal split size of " + this.minSplitSize + " is larger than the block size of " + blockSize + ". Decreasing minimal split size to block size.");
            localminSplitSize = blockSize;
        }
        final long splitSize = Math.max(localminSplitSize, Math.min(maxSplitSize, blockSize));
        if (len > 0) {
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
            Arrays.sort(blocks);
            long position = 0;
            int blockIndex = 0;
            for (int i = 0; i < curminNumSplits; i++) {
                blockIndex = getBlockIndexForPosition(blocks, position, splitSize, blockIndex);
                final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts());
                inputSplits.add(fis);
            }
        } else {
            // special case with a file of zero bytes size
            final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
            String[] hosts;
            if (blocks.length > 0) {
                hosts = blocks[0].getHosts();
            } else {
                hosts = new String[0];
            }
            for (int i = 0; i < curminNumSplits; i++) {
                final FileInputSplit fis = new CSVInputSplit(splitNum++, file.getPath(), 0, 0, hosts);
                inputSplits.add(fis);
            }
        }
    }
    return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
}
Also used : Path(edu.iu.dsc.tws.api.data.Path) FileStatus(edu.iu.dsc.tws.api.data.FileStatus) ArrayList(java.util.ArrayList) BlockLocation(edu.iu.dsc.tws.api.data.BlockLocation) FileInputSplit(edu.iu.dsc.tws.data.api.splits.FileInputSplit) CSVInputSplit(edu.iu.dsc.tws.data.api.splits.CSVInputSplit) FileSystem(edu.iu.dsc.tws.api.data.FileSystem)

Aggregations

FileSystem (edu.iu.dsc.tws.api.data.FileSystem)26 FileStatus (edu.iu.dsc.tws.api.data.FileStatus)19 Path (edu.iu.dsc.tws.api.data.Path)18 ArrayList (java.util.ArrayList)11 IOException (java.io.IOException)10 BlockLocation (edu.iu.dsc.tws.api.data.BlockLocation)7 FileInputSplit (edu.iu.dsc.tws.data.api.splits.FileInputSplit)7 FSDataOutputStream (edu.iu.dsc.tws.api.data.FSDataOutputStream)4 PrintWriter (java.io.PrintWriter)4 CSVInputSplit (edu.iu.dsc.tws.data.api.splits.CSVInputSplit)3 LocalFileSystem (edu.iu.dsc.tws.data.fs.local.LocalFileSystem)3 HadoopFileSystem (edu.iu.dsc.tws.data.hdfs.HadoopFileSystem)3 File (java.io.File)2 URI (java.net.URI)2 URISyntaxException (java.net.URISyntaxException)2 Random (java.util.Random)2 TaskSchedulerException (edu.iu.dsc.tws.api.compute.exceptions.TaskSchedulerException)1 Config (edu.iu.dsc.tws.api.config.Config)1 BinaryInputSplit (edu.iu.dsc.tws.data.api.splits.BinaryInputSplit)1 DataFileReader (edu.iu.dsc.tws.data.utils.DataFileReader)1