use of org.apache.tika.batch.fs.FSDocumentSelector in project tika by apache.
the class FSCrawlerBuilder method buildSelector.
private DocumentSelector buildSelector(Map<String, String> attributes) {
String includeString = attributes.get(INCLUDE_FILE_PAT_ATTR);
String excludeString = attributes.get(EXCLUDE_FILE_PAT_ATTR);
long maxFileSize = PropsUtil.getLong(attributes.get(MAX_FILE_SIZE_BYTES_ATTR), -1L);
long minFileSize = PropsUtil.getLong(attributes.get(MIN_FILE_SIZE_BYTES_ATTR), -1L);
Pattern includePat = (includeString != null && includeString.length() > 0) ? Pattern.compile(includeString) : null;
Pattern excludePat = (excludeString != null && excludeString.length() > 0) ? Pattern.compile(excludeString) : null;
return new FSDocumentSelector(includePat, excludePat, minFileSize, maxFileSize);
}
Aggregations