use of java.nio.file.Files in project suite by stupidsing.
the class LibraryMain method run.
protected boolean run(String[] args) {
Pair<Streamlet2<Path, Long>, Streamlet2<Path, Long>> partition = //
FileUtil.findPaths(Paths.get(inputDir)).filter(//
path -> fileExtensions.contains(FileUtil.getFileExtension(path))).map2(//
path -> Rethrow.ex(() -> Files.size(path))).partition((path, size) -> 0 < size);
// remove empty files
partition.t1.sink((path, size) -> {
try {
Files.delete(path);
} catch (IOException ex) {
Fail.t(ex);
}
});
Streamlet2<Path, FileInfo> path_fileInfos = //
partition.t0.map2((path, size) -> {
BasicFileAttributes attrs = Rethrow.ex(() -> Files.readAttributes(path, BasicFileAttributes.class));
// get all file information
List<String> tags = //
Ints_.range(//
path.getNameCount()).map(//
i -> path.getName(i).toString()).cons(//
To.string(attrs.lastModifiedTime().toInstant())).toList();
FileInfo fileInfo = new FileInfo();
fileInfo.md5 = Rethrow.ex(() -> Md5Crypt.md5Crypt(Files.readAllBytes(path)));
fileInfo.tags = tags;
return fileInfo;
});
// construct file listing
try (OutputStream os = FileUtil.out(inputDir + ".listing");
PrintWriter pw = new PrintWriter(os)) {
for (Pair<Path, FileInfo> path_fileInfo : path_fileInfos) pw.println(path_fileInfo.t0 + path_fileInfo.t1.md5);
} catch (IOException ex) {
Fail.t(ex);
}
//
path_fileInfos.map2((path, fileInfo) -> {
// move file to library, by md5
Path path1 = Paths.get(libraryDir, fileInfo.md5.substring(0, 2), fileInfo.md5);
FileUtil.mkdir(path1.getParent());
Rethrow.ex(() -> Files.move(path, path1, StandardCopyOption.REPLACE_EXISTING));
return fileInfo;
}).concatMap((path, fileInfo) -> Read.from(fileInfo.tags).map(tag -> {
// add to tag indices
Path path1 = Paths.get(tagsDir, tag, fileInfo.md5);
return Rethrow.ex(() -> {
Files.newOutputStream(path1).close();
return Pair.of(tag, fileInfo);
});
}));
return true;
}
use of java.nio.file.Files in project judge by zjnu-acm.
the class ExtensionsViewer method main.
/**
* @param args the command line arguments
* @throws java.io.IOException
*/
@SuppressWarnings("UseOfSystemOutOrSystemErr")
public static void main(String[] args) throws IOException {
Path path = Paths.get(".");
Map<String, List<Path>> map = Files.walk(path).filter(p -> p.getNameCount() == 1 || !p.getName(1).toString().matches("target|\\.(?:git|idea|svn|settings)")).filter(Files::isRegularFile).filter(pp -> !getExtension(pp).isEmpty()).collect(Collectors.groupingBy(ExtensionsViewer::getExtension));
map.keySet().removeIf(Files.lines(path.resolve(".gitattributes")).map(str -> str.trim()).filter(str -> str.startsWith("*.")).map(str -> str.replaceAll("\\*\\.|\\s.+", "")).collect(Collectors.toSet())::contains);
System.out.println(map);
}
use of java.nio.file.Files in project karaf by apache.
the class FilesStream method stream.
/**
* Returns a stream of Paths for the given fileNames.
* The given names can be delimited by ",". A name can also contain
* {@link java.nio.file.FileSystem#getPathMatcher} syntax to refer to matching files.
*
* @param fileNames list of names
* @return Paths to the scripts
*/
public static Stream<Path> stream(String fileNames) {
if (fileNames == null) {
return Stream.empty();
}
List<String> files = new ArrayList<>();
List<String> generators = new ArrayList<>();
StringBuilder buf = new StringBuilder(fileNames.length());
boolean hasUnescapedReserved = false;
boolean escaped = false;
for (int i = 0; i < fileNames.length(); i++) {
char c = fileNames.charAt(i);
if (escaped) {
buf.append(c);
escaped = false;
} else if (c == '\\') {
escaped = true;
} else if (c == ',') {
if (hasUnescapedReserved) {
generators.add(buf.toString());
} else {
files.add(buf.toString());
}
hasUnescapedReserved = false;
buf.setLength(0);
} else if ("*?{[".indexOf(c) >= 0) {
hasUnescapedReserved = true;
buf.append(c);
} else {
buf.append(c);
}
}
if (buf.length() > 0) {
if (hasUnescapedReserved) {
generators.add(buf.toString());
} else {
files.add(buf.toString());
}
}
Path cur = Paths.get(System.getProperty("karaf.etc"));
return Stream.concat(files.stream().map(cur::resolve), generators.stream().flatMap(s -> files(cur, s)));
}
use of java.nio.file.Files in project zemberek-nlp by ahmetaa.
the class EliminateDuplicates method extractLabeledDocuments.
private void extractLabeledDocuments(Path root, Path outFile) throws IOException {
List<Path> files = Files.walk(root).filter(s -> s.toFile().isFile()).collect(Collectors.toList());
files.sort(Comparator.comparing(Path::toString));
WebCorpus corpus = new WebCorpus("c", "c");
for (Path file : files) {
if (file.toFile().isDirectory()) {
continue;
}
Log.info("Adding %s", file);
List<WebDocument> doc = WebCorpus.loadDocuments(file);
for (WebDocument webDocument : doc) {
webDocument.removeDuplicateLines();
}
List<WebDocument> labeled = doc.stream().filter(s -> s.getContentAsString().length() > 200).collect(Collectors.toList());
corpus.addDocuments(labeled);
}
Log.info("Total amount of files = %d", corpus.getDocuments().size());
WebCorpus noDuplicates = corpus.copyNoDuplicates();
Log.info("Corpus size = %d, After removing duplicates = %d", corpus.documentCount(), noDuplicates.documentCount());
Log.info("Saving corpus to %s", outFile);
noDuplicates.save(outFile, false);
}
use of java.nio.file.Files in project zemberek-nlp by ahmetaa.
the class AutomaticLabelingExperiment method extractLabeledDocuments.
private void extractLabeledDocuments(Path root, Path labeledFile) throws IOException {
List<Path> files = Files.walk(root).filter(s -> s.toFile().isFile()).collect(Collectors.toList());
files.sort(Comparator.comparing(Path::toString));
WebCorpus corpus = new WebCorpus("label", "label");
for (Path file : files) {
if (file.toFile().isDirectory()) {
continue;
}
Log.info("Adding %s", file);
List<WebDocument> doc = WebCorpus.loadDocuments(file);
List<WebDocument> labeled = doc.stream().filter(s -> s.getLabels().size() > 0 && s.getContentAsString().length() > 200).collect(Collectors.toList());
corpus.addDocuments(labeled);
}
Log.info("Total amount of files = %d", corpus.getDocuments().size());
WebCorpus noDuplicates = corpus.copyNoDuplicates();
Log.info("Corpus size = %d, After removing duplicates = %d", corpus.documentCount(), noDuplicates.documentCount());
Log.info("Saving corpus to %s", labeledFile);
noDuplicates.save(labeledFile, false);
}
Aggregations