Search in sources :

Example 16 with IndexingConfig

use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.

the class RdfIndexingSourceTest method testEntityDataProvider.

@Test
public void testEntityDataProvider() {
    log.info(" --- testEntityDataProvider ---");
    String testName = "provider";
    IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
    };
    EntityIterator entityIdIterator = config.getEntityIdIterator();
    assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
    if (entityIdIterator.needsInitialisation()) {
        entityIdIterator.initialise();
    }
    EntityDataProvider dataProvider = config.getEntityDataProvider();
    assertNotNull(dataProvider);
    //there are test data to load
    assertTrue(dataProvider.needsInitialisation());
    dataProvider.initialise();
    assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
    long count = 0;
    while (entityIdIterator.hasNext()) {
        EntityScore entityScore = entityIdIterator.next();
        assertNotNull(entityScore);
        assertNotNull(entityScore.id);
        validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
        count++;
    }
    //check if all entities where found
    assertEquals(String.format("%s Entities expected but %s processed!", NUMBER_OF_ENTITIES_EXPECTED, count), NUMBER_OF_ENTITIES_EXPECTED, count);
}
Also used : EntityDataProvider(org.apache.stanbol.entityhub.indexing.core.EntityDataProvider) EntityScore(org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore) IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) EntityIterator(org.apache.stanbol.entityhub.indexing.core.EntityIterator) Test(org.junit.Test)

Example 17 with IndexingConfig

use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.

the class RdfIndexingSourceTest method testQuadsImport.

/**
     * Tests support for Quads (STANBOL-764)
     */
@Test
public void testQuadsImport() {
    log.info(" --- testQuadsImport ---");
    String testName = "quads";
    IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
    };
    EntityIterator entityIdIterator = config.getEntityIdIterator();
    assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
    if (entityIdIterator.needsInitialisation()) {
        entityIdIterator.initialise();
    }
    EntityDataProvider dataProvider = config.getEntityDataProvider();
    assertNotNull(dataProvider);
    //there are test data to load
    assertTrue(dataProvider.needsInitialisation());
    dataProvider.initialise();
    assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
    long count = 0;
    while (entityIdIterator.hasNext()) {
        EntityScore entityScore = entityIdIterator.next();
        assertNotNull(entityScore);
        assertNotNull(entityScore.id);
        validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
        count++;
    }
    //check if all 9 entities where imported to the default dataset
    // (and not named graphs)
    assertEquals(String.format("%s Entities expected but %s processed!", 9, count), 9, count);
}
Also used : EntityDataProvider(org.apache.stanbol.entityhub.indexing.core.EntityDataProvider) EntityScore(org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore) IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) EntityIterator(org.apache.stanbol.entityhub.indexing.core.EntityIterator) Test(org.junit.Test)

Example 18 with IndexingConfig

use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.

the class RdfIndexingSource method setConfiguration.

@Override
public void setConfiguration(Map<String, Object> config) {
    IndexingConfig indexingConfig = (IndexingConfig) config.get(IndexingConfig.KEY_INDEXING_CONFIG);
    //(0) parse the baseUri
    Object value = config.get(PARAM_BASE_URI);
    baseUri = value == null ? DEFAULT_BASE_URI : value.toString();
    //(1) init the Sesame Repository from the RDF config
    value = config.get(PARAM_REPOSITORY_CONFIG);
    File repoConfigFile = indexingConfig.getConfigFile(value != null ? value.toString() : DEFAULT_REPOSITORY_CONFIG);
    if (repoConfigFile.isFile()) {
        //read the config (an RDF file)
        this.repoConfig = loadRepositoryConfig(repoConfigFile);
    } else {
        throw new IllegalArgumentException("The configured Sesame Repository configuration file " + repoConfigFile + " is missing. Please use the '" + PARAM_REPOSITORY_CONFIG + "' paramteter to configure the actual configuration file (relative " + "to the config '" + indexingConfig.getConfigFolder() + "'folder)");
    }
    RepositoryFactory factory = RepositoryRegistry.getInstance().get(repoConfig.getRepositoryImplConfig().getType());
    if (factory == null) {
        throw new IllegalStateException("Unable to initialise Repository (id: " + repoConfig.getID() + ", title: " + repoConfig.getTitle() + ", impl: " + repoConfig.getRepositoryImplConfig().getType() + ") because no " + "RepositoryFactory is present for the specified implementation!");
    }
    try {
        repository = factory.getRepository(repoConfig.getRepositoryImplConfig());
        sesameFactory = repository.getValueFactory();
        repository.initialize();
        //we created it, so we do shut it down
        shutdownRepository = true;
    } catch (RepositoryConfigException e) {
        throw new IllegalStateException("Unable to initialise Repository (id: " + repoConfig.getID() + ", title: " + repoConfig.getTitle() + ", impl: " + repoConfig.getRepositoryImplConfig().getType() + ")!", e);
    } catch (RepositoryException e) {
        throw new IllegalStateException("Unable to initialise Repository (id: " + repoConfig.getID() + ", title: " + repoConfig.getTitle() + ", impl: " + repoConfig.getRepositoryImplConfig().getType() + ")!", e);
    }
    //(2) init the resourceLoader
    loader = new ResourceLoader(new RdfResourceImporter(repository, baseUri), indexingConfig.isFailOnError());
    value = config.get(PARAM_IMPORTED_FOLDER);
    //set the folder for imported files
    String importedFolderName;
    if (value != null && !value.toString().isEmpty()) {
        importedFolderName = value.toString();
    } else {
        importedFolderName = DEFAULT_IMPORTED_FOLDER_NAME;
    }
    File importedFolder = new File(indexingConfig.getSourceFolder(), importedFolderName);
    log.info("Imported RDF File Folder: {}", importedFolder);
    this.loader.setImportedDir(importedFolder);
    //check if importing is deactivated
    //default is true
    boolean importSource = true;
    value = config.get(PARAM_IMPORT_SOURCE);
    if (value != null) {
        importSource = Boolean.parseBoolean(value.toString());
    }
    if (importSource) {
        // if we need to import ... check the source config
        log.info("Importing RDF data from:");
        value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
        if (value == null) {
            //if not set use the default
            value = DEFAULT_SOURCE_FOLDER_NAME;
        }
        for (String source : value.toString().split(",")) {
            File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
            if (sourceFileOrDirectory.exists()) {
                //register the configured source with the ResourceLoader
                this.loader.addResource(sourceFileOrDirectory);
            } else {
                if (FilenameUtils.getExtension(source).isEmpty()) {
                    //RDF files.
                    if (!sourceFileOrDirectory.mkdirs()) {
                        log.warn("Unable to create directory {} configured to improt RDF data from. " + "You will need to create this directory manually before copying the" + "RDF files into it.", sourceFileOrDirectory);
                        this.loader.addResource(sourceFileOrDirectory);
                    }
                } else {
                    log.warn("Unable to find RDF source {} within the indexing Source folder {}", source, indexingConfig.getSourceFolder());
                }
            }
        }
        if (log.isInfoEnabled()) {
            for (String registeredSource : loader.getResources(ResourceState.REGISTERED)) {
                log.info(" > " + registeredSource);
            }
        }
    } else {
        log.info("Importing RDF data deactivated by parameer {}={}" + PARAM_IMPORT_SOURCE, value);
    }
}
Also used : ResourceLoader(org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader) IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) RepositoryException(org.openrdf.repository.RepositoryException) RepositoryFactory(org.openrdf.repository.config.RepositoryFactory) RepositoryConfigException(org.openrdf.repository.config.RepositoryConfigException) File(java.io.File)

Example 19 with IndexingConfig

use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.

the class Utils method getTDBDataset.

/**
     * uses the parsed configuration to get/create the Jena TDB store
     * @param config A configuration as parsed to {@link IndexingComponent#setConfiguration(Map)}
     * @return the opened/created Jena TDB dataset
     * @throws IllegalArgumentException if the config is <code>null</code>; is
     * missing a value for the {@link IndexingConfig#KEY_INDEXING_CONFIG} or
     * {@link #initTDBDataset(File)} throws an IllegalArgumentException
     */
public static DatasetGraphTDB getTDBDataset(Map<String, Object> config) {
    IndexingConfig indexingConfig = (IndexingConfig) config.get(KEY_INDEXING_CONFIG);
    if (indexingConfig == null) {
        throw new IllegalArgumentException("No IndexingConfig object present as value of key '" + KEY_INDEXING_CONFIG + "'!");
    }
    Object value = config.get(PARAM_MODEL_DIRECTORY);
    File modelLocation;
    if (value == null) {
        modelLocation = new File(indexingConfig.getSourceFolder(), DEFAULT_MODEL_DIRECTORY);
    } else {
        modelLocation = new File(indexingConfig.getSourceFolder(), value.toString());
    }
    return initTDBDataset(modelLocation);
}
Also used : IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) File(java.io.File)

Example 20 with IndexingConfig

use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.

the class SolrYardIndexingDestinationTest method testMissingBoostConfig.

@Test(expected = IllegalArgumentException.class)
public void testMissingBoostConfig() {
    String testName = "missingBoostConfig";
    IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
    };
    config.getIndexingDestination();
}
Also used : IndexingConfig(org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig) Test(org.junit.Test)

Aggregations

IndexingConfig (org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig)26 Test (org.junit.Test)15 File (java.io.File)8 EntityIterator (org.apache.stanbol.entityhub.indexing.core.EntityIterator)4 EntityScore (org.apache.stanbol.entityhub.indexing.core.EntityIterator.EntityScore)4 IOException (java.io.IOException)3 EntityDataIterable (org.apache.stanbol.entityhub.indexing.core.EntityDataIterable)3 EntityDataIterator (org.apache.stanbol.entityhub.indexing.core.EntityDataIterator)3 EntityDataProvider (org.apache.stanbol.entityhub.indexing.core.EntityDataProvider)3 ResourceLoader (org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader)3 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 NamespacePrefixService (org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService)2 ScoreNormaliser (org.apache.stanbol.entityhub.indexing.core.normaliser.ScoreNormaliser)2 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 IndexerImpl (org.apache.stanbol.entityhub.indexing.core.impl.IndexerImpl)1 EntityIneratorToScoreProviderAdapter (org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter)1