Search in sources :

Example 11 with WildcardFileFilter

use of org.apache.commons.io.filefilter.WildcardFileFilter in project stanbol by apache.

the class IndexConfiguration method processFstConfig.

/**
     * This method combines the {@link #fstConfig} with the data present in the
     * {@link SolrCore}. 
     * <p>
     * As information for fields are only available when a
     * field was actually used by a document stored in the index one needs to
     * inspect the index after every change. 
     * <p>
     * An empty Solr index will result in
     * an empty {@link #corpusInfos} map. The first document with an value
     * for the English field will cause an {@link CorpusInfo} for the English
     * language to be created. As soon as the last document with an label for
     * a given language will be deleted the {@link CorpusInfo} for that language
     * will also disappear.
     * @param indexVersion the current version of the {@link #index} to process
     * the FST config for.
     * <p>
     * This method acquires a write lock on {@link #corpusInfoLock} while it
     * inspects the Solr index
     * @param indexReader The {@link AtomicReader} has access to the actual
     * fields present in the {@link SolrCore}. It is used to compare field
     * configurations in the {@link #fstConfig} with fields present in the Solr
     * {@link #index}.
     * @return If any {@link CorpusInfo FST configuration} where found during
     * inspecting the Solr {@link #index}
     */
private boolean processFstConfig(long indexVersion, AtomicReader indexReader) {
    //first check if the Solr index was updated
    corpusInfoLock.readLock().lock();
    try {
        if (indexVersion == this.indexVersion) {
            //nothing to do
            return !corpusInfos.isEmpty();
        }
    } finally {
        corpusInfoLock.readLock().unlock();
    }
    log.debug("> {} FST config for {} (FST dir: {})", corpusInfos == null ? "create" : "update", index.getName(), fstDirectory.getAbsolutePath());
    boolean foundCorpus = false;
    corpusInfoLock.writeLock().lock();
    try {
        this.indexVersion = indexVersion;
        IndexSchema schema = index.getLatestSchema();
        Map<String, CorpusInfo> corpusInfosCopy;
        if (corpusInfos == null) {
            //first call
            //init the field
            corpusInfos = new HashMap<String, CorpusInfo>();
            corpusInfosCopy = new HashMap<String, CorpusInfo>();
        } else {
            corpusInfosCopy = new HashMap<String, CorpusInfo>(corpusInfos);
            //clear the old data
            corpusInfos.clear();
        }
        //(0) get basic parameters of the default configuration
        log.debug(" - default config");
        Map<String, String> defaultParams = fstConfig.getDefaultParameters();
        String fstName = defaultParams.get(IndexConfiguration.PARAM_FST);
        String indexField = defaultParams.get(IndexConfiguration.PARAM_FIELD);
        String storeField = defaultParams.get(IndexConfiguration.PARAM_STORE_FIELD);
        if (storeField == null) {
            //apply indexField as default if indexField is NOT NULL
            storeField = indexField;
        }
        if (indexField == null) {
            //apply the defaults if null
            indexField = IndexConfiguration.DEFAULT_FIELD;
        }
        if (fstName == null) {
            //use default
            fstName = getDefaultFstFileName(indexField);
        }
        //This are all fields actually present in the index (distinguished with
        //those defined in the schema). This also includes actual instances of
        //dynamic field definition in the schema.
        //we need this twice
        FieldInfos fieldInfos = indexReader.getFieldInfos();
        //NOTE: this needs only do be done if wildcards are enabled in the fstConfig
        if (fstConfig.useWildcard()) {
            //(1.a) search for present FST files in the FST directory
            Map<String, File> presentFstFiles = new HashMap<String, File>();
            WildcardFileFilter fstFilter = new WildcardFileFilter(fstName + ".*.fst");
            Iterator<File> fstFiles = FileUtils.iterateFiles(fstDirectory, fstFilter, null);
            while (fstFiles.hasNext()) {
                File fstFile = fstFiles.next();
                String fstFileName = fstFile.getName();
                //files are named such as "{name}.{lang}.fst"
                String language = FilenameUtils.getExtension(FilenameUtils.getBaseName(fstFileName));
                presentFstFiles.put(language, fstFile);
            }
            //(1.b) iterate over the fields in the Solr index and search for 
            //      matches against the configured indexField name
            String fieldWildcard = FieldEncodingEnum.encodeLanguage(indexField, fieldEncoding, "*");
            for (FieldInfo fieldInfo : fieldInfos) {
                //try to match the field names against the wildcard
                if (FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)) {
                    //for matches parse the language from the field name
                    String language = FieldEncodingEnum.parseLanguage(fieldInfo.name, fieldEncoding, indexField);
                    if (//successfully parsed language
                    language != null && //is current language is enabled? 
                    fstConfig.isLanguage(language) && //is there no explicit configuration for this language?
                    !fstConfig.getExplicitlyIncluded().contains(language)) {
                        //generate the FST file name
                        StringBuilder fstFileName = new StringBuilder(fstName);
                        if (!language.isEmpty()) {
                            fstFileName.append('.').append(language);
                        }
                        fstFileName.append(".fst");
                        File fstFile = new File(fstDirectory, fstFileName.toString());
                        //get the FieldType of the field from the Solr schema
                        FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
                        if (fieldType != null) {
                            //if the fieldType is present
                            if (runtimeGeneration || fstFile.isFile()) {
                                //and FST is present or can be created
                                //we need also to check if the stored field with
                                //the labels is present
                                //get the stored Field and check if it is present!
                                String storeFieldName;
                                if (storeField == null) {
                                    //storeField == indexField
                                    storeFieldName = fieldInfo.name;
                                } else {
                                    // check that the storeField is present in the index
                                    storeFieldName = FieldEncodingEnum.encodeLanguage(storeField, fieldEncoding, language);
                                    FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
                                    if (storedFieldInfos == null) {
                                        log.debug(" ... ignore language {} because Stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, storeFieldName, fieldInfo.name });
                                        storeFieldName = null;
                                    }
                                }
                                if (storeFieldName != null) {
                                    // == valid configuration
                                    CorpusInfo fstInfo = corpusInfosCopy.get(language);
                                    if (//new one
                                    fstInfo == null || //index field compatible
                                    !fstInfo.indexedField.equals(fieldInfo.name) || !fstInfo.storedField.equals(storeFieldName)) {
                                        //store field compatible
                                        CorpusInfo newFstInfo = new CorpusInfo(language, fieldInfo.name, storeFieldName, fieldType, fstFile, runtimeGeneration);
                                        log.debug(" ... {} {} ", fstInfo == null ? "create" : "update", newFstInfo);
                                        addCorpusInfo(newFstInfo);
                                        corpusInfosCopy.put(language, newFstInfo);
                                    } else {
                                        //no change in the SolrIndex ... use the exsisting CorpusInfo
                                        addCorpusInfo(fstInfo);
                                    }
                                    foundCorpus = true;
                                }
                            } else {
                                log.debug(" ... ignore language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, fieldInfo.name, fstFile.getAbsolutePath() });
                            }
                        } else {
                            log.debug(" ... ignore language {} becuase unknown fieldtype " + "for SolrFied {}", language, fieldInfo.name);
                        }
                    }
                //else the field matched the wildcard, but has not passed the
                //encoding test.
                }
            //Solr field does not match the field definition in the config
            }
        // end iterate over all fields in the SolrIndex
        }
        //(2) process explicit configuration for configured languages
        for (String language : fstConfig.getExplicitlyIncluded()) {
            //(2.a) get the language specific config (with fallback to default)
            Map<String, String> config = fstConfig.getParameters(language);
            String langIndexField = config.get(IndexConfiguration.PARAM_FIELD);
            String langStoreField = config.get(IndexConfiguration.PARAM_STORE_FIELD);
            String langFstFileName = config.get(IndexConfiguration.PARAM_FST);
            final boolean langAllowCreation;
            final String langAllowCreationString = config.get(IndexConfiguration.PARAM_RUNTIME_GENERATION);
            if (langIndexField != null) {
                //also consider explicit field names as default for the fst name
                if (langFstFileName == null) {
                    StringBuilder fileName = new StringBuilder(getDefaultFstFileName(langIndexField));
                    if (!language.isEmpty()) {
                        fileName.append('.').append(language);
                    }
                    fileName.append(".fst");
                    langFstFileName = fileName.toString();
                }
            } else {
                langIndexField = indexField;
            }
            if (langStoreField == null) {
                //fallbacks
                if (storeField != null) {
                    //first to default store field
                    langStoreField = storeField;
                } else {
                    //else to the lang index field
                    langStoreField = langIndexField;
                }
            }
            if (langFstFileName == null) {
                //no fstFileName config
                // ... use the default
                langFstFileName = new StringBuilder(fstName).append('.').append(language).append(".fst").toString();
            }
            if (langAllowCreationString != null) {
                langAllowCreation = Boolean.parseBoolean(langAllowCreationString);
            } else {
                langAllowCreation = runtimeGeneration;
            }
            //(2.b) check if the Solr field is present
            String encodedLangIndexField = FieldEncodingEnum.encodeLanguage(langIndexField, fieldEncoding, language);
            String encodedLangStoreField = FieldEncodingEnum.encodeLanguage(langStoreField, fieldEncoding, language);
            FieldInfo langIndexFieldInfo = fieldInfos.fieldInfo(encodedLangIndexField);
            if (langIndexFieldInfo != null) {
                FieldInfo langStoreFieldInfo = fieldInfos.fieldInfo(encodedLangStoreField);
                if (langStoreFieldInfo != null) {
                    FieldType fieldType = schema.getFieldTypeNoEx(langIndexFieldInfo.name);
                    if (fieldType != null) {
                        //(2.c) check the FST file
                        File langFstFile = new File(fstDirectory, langFstFileName);
                        if (langFstFile.isFile() || langAllowCreation) {
                            CorpusInfo langFstInfo = corpusInfosCopy.get(language);
                            if (//new one
                            langFstInfo == null || //index field compatible
                            !langFstInfo.indexedField.equals(encodedLangIndexField) || !langFstInfo.storedField.equals(encodedLangStoreField)) {
                                //store field compatible
                                CorpusInfo newLangFstInfo = new CorpusInfo(language, encodedLangIndexField, encodedLangStoreField, fieldType, langFstFile, langAllowCreation);
                                log.debug("   ... {} {} for explicitly configured language", langFstInfo == null ? "create" : "update", newLangFstInfo);
                                addCorpusInfo(newLangFstInfo);
                            } else {
                                //we can use the existing instance
                                addCorpusInfo(langFstInfo);
                            }
                            foundCorpus = true;
                        } else {
                            log.debug(" ... ignore explicitly configured language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, langIndexFieldInfo.name, langFstFile.getAbsolutePath() });
                        }
                    } else {
                        log.debug(" ... ignore explicitly configured language {} becuase unknown fieldtype " + "for SolrFied {}", language, langIndexFieldInfo.name);
                    }
                } else {
                    log.debug(" ... ignore explicitly configured language {} because configured stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, langStoreField, langIndexFieldInfo.name });
                }
            } else {
                log.debug(" ... ignore explicitly configured language {} because configured field {} (encoded: {}) " + "is not present in the SolrIndex!", new Object[] { language, langIndexField, encodedLangIndexField });
            }
        }
    } finally {
        corpusInfoLock.writeLock().unlock();
    }
    return foundCorpus;
}
Also used : HashMap(java.util.HashMap) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter) FieldType(org.apache.solr.schema.FieldType) FieldInfos(org.apache.lucene.index.FieldInfos) IndexSchema(org.apache.solr.schema.IndexSchema) File(java.io.File) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 12 with WildcardFileFilter

use of org.apache.commons.io.filefilter.WildcardFileFilter in project druid by druid-io.

the class LocalFirehoseFactory method connect.

@Override
public Firehose connect(StringInputRowParser firehoseParser) throws IOException {
    if (baseDir == null) {
        throw new IAE("baseDir is null");
    }
    log.info("Searching for all [%s] in and beneath [%s]", filter, baseDir.getAbsoluteFile());
    Collection<File> foundFiles = FileUtils.listFiles(baseDir.getAbsoluteFile(), new WildcardFileFilter(filter), TrueFileFilter.INSTANCE);
    if (foundFiles == null || foundFiles.isEmpty()) {
        throw new ISE("Found no files to ingest! Check your schema.");
    }
    log.info("Found files: " + foundFiles);
    final LinkedList<File> files = Lists.newLinkedList(foundFiles);
    return new FileIteratingFirehose(new Iterator<LineIterator>() {

        @Override
        public boolean hasNext() {
            return !files.isEmpty();
        }

        @Override
        public LineIterator next() {
            try {
                return FileUtils.lineIterator(files.poll());
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }, firehoseParser);
}
Also used : FileIteratingFirehose(io.druid.data.input.impl.FileIteratingFirehose) ISE(io.druid.java.util.common.ISE) IAE(io.druid.java.util.common.IAE) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter) File(java.io.File) LineIterator(org.apache.commons.io.LineIterator) IOException(java.io.IOException)

Example 13 with WildcardFileFilter

use of org.apache.commons.io.filefilter.WildcardFileFilter in project gocd by gocd.

the class DevelopmentServer method copyActivatorJarToClassPath.

private static void copyActivatorJarToClassPath() throws IOException {
    File activatorJar = new File("../plugin-infra/go-plugin-activator/target/libs/").listFiles((FileFilter) new WildcardFileFilter("go-plugin-activator-*.jar"))[0];
    new SystemEnvironment().set(SystemEnvironment.PLUGIN_ACTIVATOR_JAR_PATH, "go-plugin-activator.jar");
    if (activatorJar.exists()) {
        FileUtils.copyFile(activatorJar, new File(classpath(), "go-plugin-activator.jar"));
    } else {
        System.err.println("Could not find plugin activator jar, Plugin framework will not be loaded.");
    }
}
Also used : SystemEnvironment(com.thoughtworks.go.util.SystemEnvironment) FileFilter(java.io.FileFilter) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter) File(java.io.File) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter)

Example 14 with WildcardFileFilter

use of org.apache.commons.io.filefilter.WildcardFileFilter in project jersey by jersey.

the class FilePatternDoesNotExistRule method execute.

public void execute(EnforcerRuleHelper helper) throws EnforcerRuleException {
    if (files == null) {
        return;
    }
    for (File file : files) {
        final String filePath = file.getPath();
        final String prefix = filePath.substring(0, filePath.indexOf("*"));
        final String dirName = prefix.substring(0, prefix.lastIndexOf(File.separator));
        final String fileItselfPattern = filePath.substring(dirName.length() + 1);
        final File dir = new File(dirName);
        if (!dir.isDirectory()) {
            continue;
        }
        final Set<File> matchedFiles = new TreeSet<>();
        for (File matchedFile : dir.listFiles((FileFilter) new WildcardFileFilter(fileItselfPattern))) {
            matchedFiles.add(matchedFile);
        }
        if (!matchedFiles.isEmpty()) {
            throw new EnforcerRuleException("Files found! " + Arrays.toString(matchedFiles.toArray()));
        }
    }
}
Also used : TreeSet(java.util.TreeSet) EnforcerRuleException(org.apache.maven.enforcer.rule.api.EnforcerRuleException) File(java.io.File) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter)

Example 15 with WildcardFileFilter

use of org.apache.commons.io.filefilter.WildcardFileFilter in project hadoop by apache.

the class TestLeveldbTimelineStore method testLevelDbRepair.

@Test
public /**
   * Test that LevelDb repair is attempted at least once during
   * serviceInit for LeveldbTimelineStore in case open fails the
   * first time.
   */
void testLevelDbRepair() throws IOException {
    LeveldbTimelineStore store = new LeveldbTimelineStore();
    JniDBFactory factory = Mockito.mock(JniDBFactory.class);
    Mockito.when(factory.open(Mockito.any(File.class), Mockito.any(Options.class))).thenThrow(new IOException()).thenCallRealMethod();
    store.setFactory(factory);
    //Create the LevelDb in a different location
    File path = new File("target", this.getClass().getSimpleName() + "-tmpDir1").getAbsoluteFile();
    Configuration conf = new Configuration(this.config);
    conf.set(YarnConfiguration.TIMELINE_SERVICE_LEVELDB_PATH, path.getAbsolutePath());
    try {
        store.init(conf);
        Mockito.verify(factory, Mockito.times(1)).repair(Mockito.any(File.class), Mockito.any(Options.class));
        FileFilter fileFilter = new WildcardFileFilter("*" + LeveldbTimelineStore.BACKUP_EXT + "*");
        Assert.assertTrue(path.listFiles(fileFilter).length > 0);
    } finally {
        store.close();
        fsContext.delete(new Path(path.getAbsolutePath()), true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Options(org.iq80.leveldb.Options) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) JniDBFactory(org.fusesource.leveldbjni.JniDBFactory) IOException(java.io.IOException) FileFilter(java.io.FileFilter) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter) File(java.io.File) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter) Test(org.junit.Test)

Aggregations

WildcardFileFilter (org.apache.commons.io.filefilter.WildcardFileFilter)22 File (java.io.File)21 FileFilter (java.io.FileFilter)6 IOException (java.io.IOException)4 HashMap (java.util.HashMap)3 Activity (android.app.Activity)2 SystemEnvironment (com.thoughtworks.go.util.SystemEnvironment)2 AndFileFilter (org.apache.commons.io.filefilter.AndFileFilter)2 SharedPreferences (android.content.SharedPreferences)1 ConnectivityManager (android.net.ConnectivityManager)1 NetworkInfo (android.net.NetworkInfo)1 HttpResponse (ch.boye.httpclientandroidlib.HttpResponse)1 HttpGet (ch.boye.httpclientandroidlib.client.methods.HttpGet)1 ConnectTimeoutException (ch.boye.httpclientandroidlib.conn.ConnectTimeoutException)1 ButterflyModule (edu.mit.simile.butterfly.ButterflyModule)1 FileIteratingFirehose (io.druid.data.input.impl.FileIteratingFirehose)1 IAE (io.druid.java.util.common.IAE)1 ISE (io.druid.java.util.common.ISE)1 SocketTimeoutException (java.net.SocketTimeoutException)1 URI (java.net.URI)1