Search in sources :

Example 26 with IndexingConfig

use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.

the class LineBasedEntityIterator method setConfiguration.

@Override
public void setConfiguration(Map<String, Object> config) {
    log.info("Configure {} :", getClass().getSimpleName());
    IndexingConfig indexingConfig = (IndexingConfig) config.get(IndexingConfig.KEY_INDEXING_CONFIG);
    if (indexingConfig != null) {
        //will be null if used for post processing
        nsPrefixService = indexingConfig.getNamespacePrefixService();
    }
    Object value = config.get(PARAM_CHARSET);
    if (value != null && value.toString() != null) {
        this.charset = value.toString();
        log.info("Set charset to '{}'", charset);
    }
    //parse encode/decode EntityIDs
    value = config.get(PARAM_URL_ENCODE_ENTITY_IDS);
    boolean encodeIds;
    if (value != null) {
        encodeIds = Boolean.parseBoolean(value.toString());
    } else if (config.containsKey(PARAM_URL_ENCODE_ENTITY_IDS)) {
        encodeIds = true;
    } else {
        encodeIds = false;
    }
    value = config.get(PARAM_URL_DECODE_ENTITY_IDS);
    boolean decodeIds;
    if (value != null) {
        decodeIds = Boolean.parseBoolean(value.toString());
    } else if (config.containsKey(PARAM_URL_DECODE_ENTITY_IDS)) {
        decodeIds = true;
    } else {
        decodeIds = false;
    }
    if (encodeIds && decodeIds) {
        throw new IllegalArgumentException(String.format("One can not enable both Parameters '{}' and '{}'!", PARAM_URL_DECODE_ENTITY_IDS, PARAM_URL_DECODE_ENTITY_IDS));
    } else if (encodeIds) {
        this.encodeEntityIds = 1;
        log.info("activate URL encoding of Entity IDs");
    } else if (decodeIds) {
        this.encodeEntityIds = -1;
        log.info("activate URL decoding of Entity IDs");
    }
    value = config.get(PARAM_ENTITY_SCORE_FILE);
    if (reader == null) {
        if (value == null || value.toString().isEmpty()) {
            scoreFile = indexingConfig.getSourceFile(DEFAULT_ENTITY_SCORE_FILE);
        } else {
            scoreFile = indexingConfig.getSourceFile(value.toString());
        }
        log.info("Set Source File to '" + this.scoreFile + "'");
    }
    //else reader parsed in the constructor ... nothing todo
    //now done in the initialise() method
    //        try {
    //            initReader(new FileInputStream(scoreFile));
    //        } catch (FileNotFoundException e) {
    //            throw new IllegalArgumentException("The File with the entity scores "+scoreFile.getAbsolutePath()+" does not exist",e);
    //        }
    value = config.get(PARAM_ID_POS);
    if (value != null) {
        try {
            setIdPos(Integer.parseInt(value.toString()));
            log.info("Set Entity ID Position to '{}'", idPos);
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("Unable to parse the position of the entity id from " + value, e);
        }
    }
    value = config.get(PARAM_SCORE_POS);
    if (value != null) {
        try {
            setScorePos(Integer.parseInt(value.toString()));
            log.info("Set Score Position to '{}'", scorePos);
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("Unable to parse the position of the entity score from " + value, e);
        }
    }
    if (idPos == scorePos) {
        throw new IllegalArgumentException("The position of the ID and the Score " + "values MUST NOT be the same value " + idPos + "! Use " + PARAM_ID_POS + "(default=" + DEFAULT_ID_POS + ") and " + PARAM_SCORE_POS + "(default=" + DEFAULT_SCORE_POS + ") to configure " + "other values than the defaults.");
    }
    value = config.get(PARAM_ID_NAMESPACE);
    if (value != null) {
        this.namespace = StringEscapeUtils.unescapeJava(value.toString());
        log.info("Set Namespace to ''", namespace);
    }
    value = config.get(PARAM_SEPARATOR);
    if (value != null && !value.toString().isEmpty()) {
        this.separator = value.toString();
        log.info("Set Separator to '{}'", separator);
    }
    value = config.get(PARAM_TRIM_LINE);
    if (value != null) {
        trimLine = Boolean.parseBoolean(value.toString());
        log.info("Set Trim Line State to '{}'", trimLine);
    } else if (config.containsKey(PARAM_TRIM_LINE)) {
        //also accept the key without value as TRUE
        trimLine = true;
        log.info("Set Trim Line State to '{}'", trimLine);
    }
    value = config.get(PARAM_TRIM_ID);
    if (value != null) {
        trimEntityId = Boolean.parseBoolean(value.toString());
        log.info("Set Entity ID State to '{}'", trimEntityId);
    } else if (config.containsKey(PARAM_TRIM_ID)) {
        //also accept the key without value as TRUE
        trimEntityId = true;
        log.info("Set Entity ID State to '{}'", trimEntityId);
    }
    //STANBOL-1015
    value = config.get(PARAM_NS_PREFIX_STATE);
    if (value instanceof Boolean) {
        nsPrefixState = ((Boolean) value).booleanValue();
    } else if (value != null) {
        nsPrefixState = Boolean.parseBoolean(value.toString());
    } else {
        //deactivate as default
        nsPrefixState = false;
    }
    if (nsPrefixState && nsPrefixService == null) {
        throw new IllegalStateException("Unable to enable Namespace Prefix support, " + "because no NamespacePrefixService is preset!");
    }
    log.info("Set Namespace Prefix State to {}" + nsPrefixState);
}
Also used : IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig)

Aggregations

IndexingConfig (org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig)26 Test (org.junit.Test)15 File (java.io.File)8 EntityIterator (org.apache.stanbol.entityhub.indexing.core.EntityIterator)4 EntityScore (org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore)4 IOException (java.io.IOException)3 EntityDataIterable (org.apache.stanbol.entityhub.indexing.core.EntityDataIterable)3 EntityDataIterator (org.apache.stanbol.entityhub.indexing.core.EntityDataIterator)3 EntityDataProvider (org.apache.stanbol.entityhub.indexing.core.EntityDataProvider)3 ResourceLoader (org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader)3 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 NamespacePrefixService (org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService)2 ScoreNormaliser (org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser)2 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 IndexerImpl (org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl)1 EntityIneratorToScoreProviderAdapter (org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter)1