use of org.apache.commons.io.filefilter.WildcardFileFilter in project stanbol by apache.
the class IndexConfiguration method processFstConfig.
/**
* This method combines the {@link #fstConfig} with the data present in the
* {@link SolrCore}.
* <p>
* As information for fields are only available when a
* field was actually used by a document stored in the index one needs to
* inspect the index after every change.
* <p>
* An empty Solr index will result in
* an empty {@link #corpusInfos} map. The first document with an value
* for the English field will cause an {@link CorpusInfo} for the English
* language to be created. As soon as the last document with an label for
* a given language will be deleted the {@link CorpusInfo} for that language
* will also disappear.
* @param indexVersion the current version of the {@link #index} to process
* the FST config for.
* <p>
* This method acquires a write lock on {@link #corpusInfoLock} while it
* inspects the Solr index
* @param indexReader The {@link AtomicReader} has access to the actual
* fields present in the {@link SolrCore}. It is used to compare field
* configurations in the {@link #fstConfig} with fields present in the Solr
* {@link #index}.
* @return If any {@link CorpusInfo FST configuration} where found during
* inspecting the Solr {@link #index}
*/
private boolean processFstConfig(long indexVersion, AtomicReader indexReader) {
//first check if the Solr index was updated
corpusInfoLock.readLock().lock();
try {
if (indexVersion == this.indexVersion) {
//nothing to do
return !corpusInfos.isEmpty();
}
} finally {
corpusInfoLock.readLock().unlock();
}
log.debug("> {} FST config for {} (FST dir: {})", corpusInfos == null ? "create" : "update", index.getName(), fstDirectory.getAbsolutePath());
boolean foundCorpus = false;
corpusInfoLock.writeLock().lock();
try {
this.indexVersion = indexVersion;
IndexSchema schema = index.getLatestSchema();
Map<String, CorpusInfo> corpusInfosCopy;
if (corpusInfos == null) {
//first call
//init the field
corpusInfos = new HashMap<String, CorpusInfo>();
corpusInfosCopy = new HashMap<String, CorpusInfo>();
} else {
corpusInfosCopy = new HashMap<String, CorpusInfo>(corpusInfos);
//clear the old data
corpusInfos.clear();
}
//(0) get basic parameters of the default configuration
log.debug(" - default config");
Map<String, String> defaultParams = fstConfig.getDefaultParameters();
String fstName = defaultParams.get(IndexConfiguration.PARAM_FST);
String indexField = defaultParams.get(IndexConfiguration.PARAM_FIELD);
String storeField = defaultParams.get(IndexConfiguration.PARAM_STORE_FIELD);
if (storeField == null) {
//apply indexField as default if indexField is NOT NULL
storeField = indexField;
}
if (indexField == null) {
//apply the defaults if null
indexField = IndexConfiguration.DEFAULT_FIELD;
}
if (fstName == null) {
//use default
fstName = getDefaultFstFileName(indexField);
}
//This are all fields actually present in the index (distinguished with
//those defined in the schema). This also includes actual instances of
//dynamic field definition in the schema.
//we need this twice
FieldInfos fieldInfos = indexReader.getFieldInfos();
//NOTE: this needs only do be done if wildcards are enabled in the fstConfig
if (fstConfig.useWildcard()) {
//(1.a) search for present FST files in the FST directory
Map<String, File> presentFstFiles = new HashMap<String, File>();
WildcardFileFilter fstFilter = new WildcardFileFilter(fstName + ".*.fst");
Iterator<File> fstFiles = FileUtils.iterateFiles(fstDirectory, fstFilter, null);
while (fstFiles.hasNext()) {
File fstFile = fstFiles.next();
String fstFileName = fstFile.getName();
//files are named such as "{name}.{lang}.fst"
String language = FilenameUtils.getExtension(FilenameUtils.getBaseName(fstFileName));
presentFstFiles.put(language, fstFile);
}
//(1.b) iterate over the fields in the Solr index and search for
// matches against the configured indexField name
String fieldWildcard = FieldEncodingEnum.encodeLanguage(indexField, fieldEncoding, "*");
for (FieldInfo fieldInfo : fieldInfos) {
//try to match the field names against the wildcard
if (FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)) {
//for matches parse the language from the field name
String language = FieldEncodingEnum.parseLanguage(fieldInfo.name, fieldEncoding, indexField);
if (//successfully parsed language
language != null && //is current language is enabled?
fstConfig.isLanguage(language) && //is there no explicit configuration for this language?
!fstConfig.getExplicitlyIncluded().contains(language)) {
//generate the FST file name
StringBuilder fstFileName = new StringBuilder(fstName);
if (!language.isEmpty()) {
fstFileName.append('.').append(language);
}
fstFileName.append(".fst");
File fstFile = new File(fstDirectory, fstFileName.toString());
//get the FieldType of the field from the Solr schema
FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
if (fieldType != null) {
//if the fieldType is present
if (runtimeGeneration || fstFile.isFile()) {
//and FST is present or can be created
//we need also to check if the stored field with
//the labels is present
//get the stored Field and check if it is present!
String storeFieldName;
if (storeField == null) {
//storeField == indexField
storeFieldName = fieldInfo.name;
} else {
// check that the storeField is present in the index
storeFieldName = FieldEncodingEnum.encodeLanguage(storeField, fieldEncoding, language);
FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
if (storedFieldInfos == null) {
log.debug(" ... ignore language {} because Stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, storeFieldName, fieldInfo.name });
storeFieldName = null;
}
}
if (storeFieldName != null) {
// == valid configuration
CorpusInfo fstInfo = corpusInfosCopy.get(language);
if (//new one
fstInfo == null || //index field compatible
!fstInfo.indexedField.equals(fieldInfo.name) || !fstInfo.storedField.equals(storeFieldName)) {
//store field compatible
CorpusInfo newFstInfo = new CorpusInfo(language, fieldInfo.name, storeFieldName, fieldType, fstFile, runtimeGeneration);
log.debug(" ... {} {} ", fstInfo == null ? "create" : "update", newFstInfo);
addCorpusInfo(newFstInfo);
corpusInfosCopy.put(language, newFstInfo);
} else {
//no change in the SolrIndex ... use the exsisting CorpusInfo
addCorpusInfo(fstInfo);
}
foundCorpus = true;
}
} else {
log.debug(" ... ignore language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, fieldInfo.name, fstFile.getAbsolutePath() });
}
} else {
log.debug(" ... ignore language {} becuase unknown fieldtype " + "for SolrFied {}", language, fieldInfo.name);
}
}
//else the field matched the wildcard, but has not passed the
//encoding test.
}
//Solr field does not match the field definition in the config
}
// end iterate over all fields in the SolrIndex
}
//(2) process explicit configuration for configured languages
for (String language : fstConfig.getExplicitlyIncluded()) {
//(2.a) get the language specific config (with fallback to default)
Map<String, String> config = fstConfig.getParameters(language);
String langIndexField = config.get(IndexConfiguration.PARAM_FIELD);
String langStoreField = config.get(IndexConfiguration.PARAM_STORE_FIELD);
String langFstFileName = config.get(IndexConfiguration.PARAM_FST);
final boolean langAllowCreation;
final String langAllowCreationString = config.get(IndexConfiguration.PARAM_RUNTIME_GENERATION);
if (langIndexField != null) {
//also consider explicit field names as default for the fst name
if (langFstFileName == null) {
StringBuilder fileName = new StringBuilder(getDefaultFstFileName(langIndexField));
if (!language.isEmpty()) {
fileName.append('.').append(language);
}
fileName.append(".fst");
langFstFileName = fileName.toString();
}
} else {
langIndexField = indexField;
}
if (langStoreField == null) {
//fallbacks
if (storeField != null) {
//first to default store field
langStoreField = storeField;
} else {
//else to the lang index field
langStoreField = langIndexField;
}
}
if (langFstFileName == null) {
//no fstFileName config
// ... use the default
langFstFileName = new StringBuilder(fstName).append('.').append(language).append(".fst").toString();
}
if (langAllowCreationString != null) {
langAllowCreation = Boolean.parseBoolean(langAllowCreationString);
} else {
langAllowCreation = runtimeGeneration;
}
//(2.b) check if the Solr field is present
String encodedLangIndexField = FieldEncodingEnum.encodeLanguage(langIndexField, fieldEncoding, language);
String encodedLangStoreField = FieldEncodingEnum.encodeLanguage(langStoreField, fieldEncoding, language);
FieldInfo langIndexFieldInfo = fieldInfos.fieldInfo(encodedLangIndexField);
if (langIndexFieldInfo != null) {
FieldInfo langStoreFieldInfo = fieldInfos.fieldInfo(encodedLangStoreField);
if (langStoreFieldInfo != null) {
FieldType fieldType = schema.getFieldTypeNoEx(langIndexFieldInfo.name);
if (fieldType != null) {
//(2.c) check the FST file
File langFstFile = new File(fstDirectory, langFstFileName);
if (langFstFile.isFile() || langAllowCreation) {
CorpusInfo langFstInfo = corpusInfosCopy.get(language);
if (//new one
langFstInfo == null || //index field compatible
!langFstInfo.indexedField.equals(encodedLangIndexField) || !langFstInfo.storedField.equals(encodedLangStoreField)) {
//store field compatible
CorpusInfo newLangFstInfo = new CorpusInfo(language, encodedLangIndexField, encodedLangStoreField, fieldType, langFstFile, langAllowCreation);
log.debug(" ... {} {} for explicitly configured language", langFstInfo == null ? "create" : "update", newLangFstInfo);
addCorpusInfo(newLangFstInfo);
} else {
//we can use the existing instance
addCorpusInfo(langFstInfo);
}
foundCorpus = true;
} else {
log.debug(" ... ignore explicitly configured language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, langIndexFieldInfo.name, langFstFile.getAbsolutePath() });
}
} else {
log.debug(" ... ignore explicitly configured language {} becuase unknown fieldtype " + "for SolrFied {}", language, langIndexFieldInfo.name);
}
} else {
log.debug(" ... ignore explicitly configured language {} because configured stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, langStoreField, langIndexFieldInfo.name });
}
} else {
log.debug(" ... ignore explicitly configured language {} because configured field {} (encoded: {}) " + "is not present in the SolrIndex!", new Object[] { language, langIndexField, encodedLangIndexField });
}
}
} finally {
corpusInfoLock.writeLock().unlock();
}
return foundCorpus;
}
use of org.apache.commons.io.filefilter.WildcardFileFilter in project druid by druid-io.
the class LocalFirehoseFactory method connect.
@Override
public Firehose connect(StringInputRowParser firehoseParser) throws IOException {
if (baseDir == null) {
throw new IAE("baseDir is null");
}
log.info("Searching for all [%s] in and beneath [%s]", filter, baseDir.getAbsoluteFile());
Collection<File> foundFiles = FileUtils.listFiles(baseDir.getAbsoluteFile(), new WildcardFileFilter(filter), TrueFileFilter.INSTANCE);
if (foundFiles == null || foundFiles.isEmpty()) {
throw new ISE("Found no files to ingest! Check your schema.");
}
log.info("Found files: " + foundFiles);
final LinkedList<File> files = Lists.newLinkedList(foundFiles);
return new FileIteratingFirehose(new Iterator<LineIterator>() {
@Override
public boolean hasNext() {
return !files.isEmpty();
}
@Override
public LineIterator next() {
try {
return FileUtils.lineIterator(files.poll());
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}, firehoseParser);
}
use of org.apache.commons.io.filefilter.WildcardFileFilter in project gocd by gocd.
the class DevelopmentServer method copyActivatorJarToClassPath.
private static void copyActivatorJarToClassPath() throws IOException {
File activatorJar = new File("../plugin-infra/go-plugin-activator/target/libs/").listFiles((FileFilter) new WildcardFileFilter("go-plugin-activator-*.jar"))[0];
new SystemEnvironment().set(SystemEnvironment.PLUGIN_ACTIVATOR_JAR_PATH, "go-plugin-activator.jar");
if (activatorJar.exists()) {
FileUtils.copyFile(activatorJar, new File(classpath(), "go-plugin-activator.jar"));
} else {
System.err.println("Could not find plugin activator jar, Plugin framework will not be loaded.");
}
}
use of org.apache.commons.io.filefilter.WildcardFileFilter in project jersey by jersey.
the class FilePatternDoesNotExistRule method execute.
public void execute(EnforcerRuleHelper helper) throws EnforcerRuleException {
if (files == null) {
return;
}
for (File file : files) {
final String filePath = file.getPath();
final String prefix = filePath.substring(0, filePath.indexOf("*"));
final String dirName = prefix.substring(0, prefix.lastIndexOf(File.separator));
final String fileItselfPattern = filePath.substring(dirName.length() + 1);
final File dir = new File(dirName);
if (!dir.isDirectory()) {
continue;
}
final Set<File> matchedFiles = new TreeSet<>();
for (File matchedFile : dir.listFiles((FileFilter) new WildcardFileFilter(fileItselfPattern))) {
matchedFiles.add(matchedFile);
}
if (!matchedFiles.isEmpty()) {
throw new EnforcerRuleException("Files found! " + Arrays.toString(matchedFiles.toArray()));
}
}
}
use of org.apache.commons.io.filefilter.WildcardFileFilter in project hadoop by apache.
the class TestLeveldbTimelineStore method testLevelDbRepair.
@Test
public /**
* Test that LevelDb repair is attempted at least once during
* serviceInit for LeveldbTimelineStore in case open fails the
* first time.
*/
void testLevelDbRepair() throws IOException {
LeveldbTimelineStore store = new LeveldbTimelineStore();
JniDBFactory factory = Mockito.mock(JniDBFactory.class);
Mockito.when(factory.open(Mockito.any(File.class), Mockito.any(Options.class))).thenThrow(new IOException()).thenCallRealMethod();
store.setFactory(factory);
//Create the LevelDb in a different location
File path = new File("target", this.getClass().getSimpleName() + "-tmpDir1").getAbsoluteFile();
Configuration conf = new Configuration(this.config);
conf.set(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_PATH, path.getAbsolutePath());
try {
store.init(conf);
Mockito.verify(factory, Mockito.times(1)).repair(Mockito.any(File.class), Mockito.any(Options.class));
FileFilter fileFilter = new WildcardFileFilter("*" + LeveldbTimelineStore.BACKUP_EXT + "*");
Assert.assertTrue(path.listFiles(fileFilter).length > 0);
} finally {
store.close();
fsContext.delete(new Path(path.getAbsolutePath()), true);
}
}
Aggregations