use of edu.stanford.nlp.io.RegExFileFilter in project CoreNLP by stanfordnlp.
the class AbstractSequenceClassifier method makeObjectBankFromFiles.
public ObjectBank<List<IN>> makeObjectBankFromFiles(String baseDir, String filePattern, DocumentReaderAndWriter<IN> readerAndWriter) {
File path = new File(baseDir);
FileFilter filter = new RegExFileFilter(Pattern.compile(filePattern));
File[] origFiles = path.listFiles(filter);
Collection<File> files = new ArrayList<>();
for (File file : origFiles) {
if (file.isFile()) {
if (flags.announceObjectBankEntries) {
log.info("Getting data from " + file + " (" + flags.inputEncoding + " encoding)");
}
files.add(file);
}
}
if (files.isEmpty()) {
throw new RuntimeException("No matching files: " + baseDir + '\t' + filePattern);
}
// TODO get rid of ObjectBankWrapper
return new ObjectBankWrapper<>(flags, new ObjectBank<>(new ResettableReaderIteratorFactory(files, flags.inputEncoding), readerAndWriter), knownLCWords);
}
use of edu.stanford.nlp.io.RegExFileFilter in project CoreNLP by stanfordnlp.
the class GetPatternsFromDataMultiClass method getAllFiles.
private static List<File> getAllFiles(String file) {
List<File> allFiles = new ArrayList<>();
for (String tokfile : file.split("[,;]")) {
File filef = new File(tokfile);
if (filef.isDirectory()) {
Redwood.log(Redwood.DBG, "Will read from directory " + filef);
String path = ".*";
File dir = filef;
for (File f : IOUtils.iterFilesRecursive(dir, java.util.regex.Pattern.compile(path))) {
Redwood.log(ConstantsAndVariables.extremedebug, "Will read from file " + f);
allFiles.add(f);
}
} else {
if (filef.exists()) {
Redwood.log(Redwood.DBG, "Will read from file " + filef);
allFiles.add(filef);
} else {
Redwood.log(Redwood.DBG, "trying to read from file " + filef);
//Is this a pattern?
RegExFileFilter fileFilter = new RegExFileFilter(java.util.regex.Pattern.compile(filef.getName()));
File dir = new File(tokfile.substring(0, tokfile.lastIndexOf("/")));
File[] files = dir.listFiles(fileFilter);
allFiles.addAll(Arrays.asList(files));
}
}
}
return allFiles;
}
Aggregations