Search in sources :

Example 16 with Files

use of java.nio.file.Files in project suite by stupidsing.

the class LibraryMain method run.

protected boolean run(String[] args) {
    Pair<Streamlet2<Path, Long>, Streamlet2<Path, Long>> partition = // 
    FileUtil.findPaths(Paths.get(inputDir)).filter(// 
    path -> fileExtensions.contains(FileUtil.getFileExtension(path))).map2(// 
    path -> Rethrow.ex(() -> Files.size(path))).partition((path, size) -> 0 < size);
    // remove empty files
    partition.t1.sink((path, size) -> {
        try {
            Files.delete(path);
        } catch (IOException ex) {
            Fail.t(ex);
        }
    });
    Streamlet2<Path, FileInfo> path_fileInfos = // 
    partition.t0.map2((path, size) -> {
        BasicFileAttributes attrs = Rethrow.ex(() -> Files.readAttributes(path, BasicFileAttributes.class));
        // get all file information
        List<String> tags = // 
        Ints_.range(// 
        path.getNameCount()).map(// 
        i -> path.getName(i).toString()).cons(// 
        To.string(attrs.lastModifiedTime().toInstant())).toList();
        FileInfo fileInfo = new FileInfo();
        fileInfo.md5 = Rethrow.ex(() -> Md5Crypt.md5Crypt(Files.readAllBytes(path)));
        fileInfo.tags = tags;
        return fileInfo;
    });
    // construct file listing
    try (OutputStream os = FileUtil.out(inputDir + ".listing");
        PrintWriter pw = new PrintWriter(os)) {
        for (Pair<Path, FileInfo> path_fileInfo : path_fileInfos) pw.println(path_fileInfo.t0 + path_fileInfo.t1.md5);
    } catch (IOException ex) {
        Fail.t(ex);
    }
    // 
    path_fileInfos.map2((path, fileInfo) -> {
        // move file to library, by md5
        Path path1 = Paths.get(libraryDir, fileInfo.md5.substring(0, 2), fileInfo.md5);
        FileUtil.mkdir(path1.getParent());
        Rethrow.ex(() -> Files.move(path, path1, StandardCopyOption.REPLACE_EXISTING));
        return fileInfo;
    }).concatMap((path, fileInfo) -> Read.from(fileInfo.tags).map(tag -> {
        // add to tag indices
        Path path1 = Paths.get(tagsDir, tag, fileInfo.md5);
        return Rethrow.ex(() -> {
            Files.newOutputStream(path1).close();
            return Pair.of(tag, fileInfo);
        });
    }));
    return true;
}
Also used : OutputStream(java.io.OutputStream) PrintWriter(java.io.PrintWriter) Md5Crypt(org.apache.commons.codec.digest.Md5Crypt) Read(suite.streamlet.Read) Streamlet2(suite.streamlet.Streamlet2) Files(java.nio.file.Files) ExecutableProgram(suite.util.RunUtil.ExecutableProgram) IOException(java.io.IOException) To(suite.util.To) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) RunUtil(suite.util.RunUtil) StandardCopyOption(java.nio.file.StandardCopyOption) Pair(suite.adt.pair.Pair) List(java.util.List) Paths(java.nio.file.Paths) Rethrow(suite.util.Rethrow) FileUtil(suite.os.FileUtil) Ints_(suite.primitive.Ints_) Path(java.nio.file.Path) Fail(suite.util.Fail) Path(java.nio.file.Path) OutputStream(java.io.OutputStream) IOException(java.io.IOException) Streamlet2(suite.streamlet.Streamlet2) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) PrintWriter(java.io.PrintWriter)

Example 17 with Files

use of java.nio.file.Files in project judge by zjnu-acm.

the class ExtensionsViewer method main.

/**
 * @param args the command line arguments
 * @throws java.io.IOException
 */
@SuppressWarnings("UseOfSystemOutOrSystemErr")
public static void main(String[] args) throws IOException {
    Path path = Paths.get(".");
    Map<String, List<Path>> map = Files.walk(path).filter(p -> p.getNameCount() == 1 || !p.getName(1).toString().matches("target|\\.(?:git|idea|svn|settings)")).filter(Files::isRegularFile).filter(pp -> !getExtension(pp).isEmpty()).collect(Collectors.groupingBy(ExtensionsViewer::getExtension));
    map.keySet().removeIf(Files.lines(path.resolve(".gitattributes")).map(str -> str.trim()).filter(str -> str.startsWith("*.")).map(str -> str.replaceAll("\\*\\.|\\s.+", "")).collect(Collectors.toSet())::contains);
    System.out.println(map);
}
Also used : Path(java.nio.file.Path) List(java.util.List) Files(java.nio.file.Files) Paths(java.nio.file.Paths) Map(java.util.Map) IOException(java.io.IOException) Path(java.nio.file.Path) Collectors(java.util.stream.Collectors) List(java.util.List) Files(java.nio.file.Files)

Example 18 with Files

use of java.nio.file.Files in project karaf by apache.

the class FilesStream method stream.

/**
 * Returns a stream of Paths for the given fileNames.
 * The given names can be delimited by ",". A name can also contain
 * {@link java.nio.file.FileSystem#getPathMatcher} syntax to refer to matching files.
 *
 * @param fileNames list of names
 * @return Paths to the scripts
 */
public static Stream<Path> stream(String fileNames) {
    if (fileNames == null) {
        return Stream.empty();
    }
    List<String> files = new ArrayList<>();
    List<String> generators = new ArrayList<>();
    StringBuilder buf = new StringBuilder(fileNames.length());
    boolean hasUnescapedReserved = false;
    boolean escaped = false;
    for (int i = 0; i < fileNames.length(); i++) {
        char c = fileNames.charAt(i);
        if (escaped) {
            buf.append(c);
            escaped = false;
        } else if (c == '\\') {
            escaped = true;
        } else if (c == ',') {
            if (hasUnescapedReserved) {
                generators.add(buf.toString());
            } else {
                files.add(buf.toString());
            }
            hasUnescapedReserved = false;
            buf.setLength(0);
        } else if ("*?{[".indexOf(c) >= 0) {
            hasUnescapedReserved = true;
            buf.append(c);
        } else {
            buf.append(c);
        }
    }
    if (buf.length() > 0) {
        if (hasUnescapedReserved) {
            generators.add(buf.toString());
        } else {
            files.add(buf.toString());
        }
    }
    Path cur = Paths.get(System.getProperty("karaf.etc"));
    return Stream.concat(files.stream().map(cur::resolve), generators.stream().flatMap(s -> files(cur, s)));
}
Also used : Path(java.nio.file.Path) Logger(org.slf4j.Logger) FileVisitor(java.nio.file.FileVisitor) Files(java.nio.file.Files) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) File(java.io.File) ArrayList(java.util.ArrayList) FileVisitResult(java.nio.file.FileVisitResult) List(java.util.List) Stream(java.util.stream.Stream) FileVisitOption(java.nio.file.FileVisitOption) Paths(java.nio.file.Paths) PathMatcher(java.nio.file.PathMatcher) Path(java.nio.file.Path) EnumSet(java.util.EnumSet) ArrayList(java.util.ArrayList)

Example 19 with Files

use of java.nio.file.Files in project zemberek-nlp by ahmetaa.

the class EliminateDuplicates method extractLabeledDocuments.

private void extractLabeledDocuments(Path root, Path outFile) throws IOException {
    List<Path> files = Files.walk(root).filter(s -> s.toFile().isFile()).collect(Collectors.toList());
    files.sort(Comparator.comparing(Path::toString));
    WebCorpus corpus = new WebCorpus("c", "c");
    for (Path file : files) {
        if (file.toFile().isDirectory()) {
            continue;
        }
        Log.info("Adding %s", file);
        List<WebDocument> doc = WebCorpus.loadDocuments(file);
        for (WebDocument webDocument : doc) {
            webDocument.removeDuplicateLines();
        }
        List<WebDocument> labeled = doc.stream().filter(s -> s.getContentAsString().length() > 200).collect(Collectors.toList());
        corpus.addDocuments(labeled);
    }
    Log.info("Total amount of files = %d", corpus.getDocuments().size());
    WebCorpus noDuplicates = corpus.copyNoDuplicates();
    Log.info("Corpus size = %d, After removing duplicates = %d", corpus.documentCount(), noDuplicates.documentCount());
    Log.info("Saving corpus to %s", outFile);
    noDuplicates.save(outFile, false);
}
Also used : Path(java.nio.file.Path) List(java.util.List) Files(java.nio.file.Files) Paths(java.nio.file.Paths) IOException(java.io.IOException) Log(zemberek.core.logging.Log) Comparator(java.util.Comparator) Path(java.nio.file.Path) Collectors(java.util.stream.Collectors)

Example 20 with Files

use of java.nio.file.Files in project zemberek-nlp by ahmetaa.

the class AutomaticLabelingExperiment method extractLabeledDocuments.

private void extractLabeledDocuments(Path root, Path labeledFile) throws IOException {
    List<Path> files = Files.walk(root).filter(s -> s.toFile().isFile()).collect(Collectors.toList());
    files.sort(Comparator.comparing(Path::toString));
    WebCorpus corpus = new WebCorpus("label", "label");
    for (Path file : files) {
        if (file.toFile().isDirectory()) {
            continue;
        }
        Log.info("Adding %s", file);
        List<WebDocument> doc = WebCorpus.loadDocuments(file);
        List<WebDocument> labeled = doc.stream().filter(s -> s.getLabels().size() > 0 && s.getContentAsString().length() > 200).collect(Collectors.toList());
        corpus.addDocuments(labeled);
    }
    Log.info("Total amount of files = %d", corpus.getDocuments().size());
    WebCorpus noDuplicates = corpus.copyNoDuplicates();
    Log.info("Corpus size = %d, After removing duplicates = %d", corpus.documentCount(), noDuplicates.documentCount());
    Log.info("Saving corpus to %s", labeledFile);
    noDuplicates.save(labeledFile, false);
}
Also used : Path(java.nio.file.Path) TurkishSentenceAnalyzer(zemberek.morphology.analysis.tr.TurkishSentenceAnalyzer) Stopwatch(com.google.common.base.Stopwatch) WebCorpus(zemberek.corpus.WebCorpus) Token(org.antlr.v4.runtime.Token) Random(java.util.Random) SentenceAnalysis(zemberek.morphology.analysis.SentenceAnalysis) WebDocument(zemberek.corpus.WebDocument) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TurkishMorphology(zemberek.morphology.analysis.tr.TurkishMorphology) TurkishTokenizer(zemberek.tokenization.TurkishTokenizer) Log(zemberek.core.logging.Log) Path(java.nio.file.Path) LinkedHashSet(java.util.LinkedHashSet) Histogram(zemberek.core.collections.Histogram) PrintWriter(java.io.PrintWriter) Files(java.nio.file.Files) Z3MarkovModelDisambiguator(zemberek.morphology.ambiguity.Z3MarkovModelDisambiguator) Set(java.util.Set) TurkishLexer(zemberek.tokenization.antlr.TurkishLexer) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) WordAnalysis(zemberek.morphology.analysis.WordAnalysis) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Turkish(zemberek.morphology.structure.Turkish) Paths(java.nio.file.Paths) ScoredItem(zemberek.core.ScoredItem) Comparator(java.util.Comparator) Collections(java.util.Collections) WebDocument(zemberek.corpus.WebDocument) WebCorpus(zemberek.corpus.WebCorpus)

Aggregations

Files (java.nio.file.Files)247 IOException (java.io.IOException)213 Path (java.nio.file.Path)199 List (java.util.List)177 Collectors (java.util.stream.Collectors)157 Paths (java.nio.file.Paths)135 File (java.io.File)130 ArrayList (java.util.ArrayList)117 Map (java.util.Map)111 Set (java.util.Set)97 Collections (java.util.Collections)89 Arrays (java.util.Arrays)81 Stream (java.util.stream.Stream)78 HashMap (java.util.HashMap)75 HashSet (java.util.HashSet)58 InputStream (java.io.InputStream)56 Collection (java.util.Collection)55 Logger (org.slf4j.Logger)54 Pattern (java.util.regex.Pattern)53 Optional (java.util.Optional)51