use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class RdfIndexingSourceTest method testEntityDataProvider.
@Test
public void testEntityDataProvider() {
log.info(" --- testEntityDataProvider ---");
String testName = "provider";
IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
};
EntityIterator entityIdIterator = config.getEntityIdIterator();
assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
if (entityIdIterator.needsInitialisation()) {
entityIdIterator.initialise();
}
EntityDataProvider dataProvider = config.getEntityDataProvider();
assertNotNull(dataProvider);
//there are test data to load
assertTrue(dataProvider.needsInitialisation());
dataProvider.initialise();
assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
long count = 0;
while (entityIdIterator.hasNext()) {
EntityScore entityScore = entityIdIterator.next();
assertNotNull(entityScore);
assertNotNull(entityScore.id);
validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
count++;
}
//check if all entities where found
assertEquals(String.format("%s Entities expected but %s processed!", NUMBER_OF_ENTITIES_EXPECTED, count), NUMBER_OF_ENTITIES_EXPECTED, count);
}
use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class RdfIndexingSourceTest method testQuadsImport.
/**
* Tests support for Quads (STANBOL-764)
*/
@Test
public void testQuadsImport() {
log.info(" --- testQuadsImport ---");
String testName = "quads";
IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
};
EntityIterator entityIdIterator = config.getEntityIdIterator();
assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
if (entityIdIterator.needsInitialisation()) {
entityIdIterator.initialise();
}
EntityDataProvider dataProvider = config.getEntityDataProvider();
assertNotNull(dataProvider);
//there are test data to load
assertTrue(dataProvider.needsInitialisation());
dataProvider.initialise();
assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
long count = 0;
while (entityIdIterator.hasNext()) {
EntityScore entityScore = entityIdIterator.next();
assertNotNull(entityScore);
assertNotNull(entityScore.id);
validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
count++;
}
//check if all 9 entities where imported to the default dataset
// (and not named graphs)
assertEquals(String.format("%s Entities expected but %s processed!", 9, count), 9, count);
}
use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class RdfIndexingSource method setConfiguration.
@Override
public void setConfiguration(Map<String, Object> config) {
IndexingConfig indexingConfig = (IndexingConfig) config.get(IndexingConfig.KEY_INDEXING_CONFIG);
//(0) parse the baseUri
Object value = config.get(PARAM_BASE_URI);
baseUri = value == null ? DEFAULT_BASE_URI : value.toString();
//(1) init the Sesame Repository from the RDF config
value = config.get(PARAM_REPOSITORY_CONFIG);
File repoConfigFile = indexingConfig.getConfigFile(value != null ? value.toString() : DEFAULT_REPOSITORY_CONFIG);
if (repoConfigFile.isFile()) {
//read the config (an RDF file)
this.repoConfig = loadRepositoryConfig(repoConfigFile);
} else {
throw new IllegalArgumentException("The configured Sesame Repository configuration file " + repoConfigFile + " is missing. Please use the '" + PARAM_REPOSITORY_CONFIG + "' paramteter to configure the actual configuration file (relative " + "to the config '" + indexingConfig.getConfigFolder() + "'folder)");
}
RepositoryFactory factory = RepositoryRegistry.getInstance().get(repoConfig.getRepositoryImplConfig().getType());
if (factory == null) {
throw new IllegalStateException("Unable to initialise Repository (id: " + repoConfig.getID() + ", title: " + repoConfig.getTitle() + ", impl: " + repoConfig.getRepositoryImplConfig().getType() + ") because no " + "RepositoryFactory is present for the specified implementation!");
}
try {
repository = factory.getRepository(repoConfig.getRepositoryImplConfig());
sesameFactory = repository.getValueFactory();
repository.initialize();
//we created it, so we do shut it down
shutdownRepository = true;
} catch (RepositoryConfigException e) {
throw new IllegalStateException("Unable to initialise Repository (id: " + repoConfig.getID() + ", title: " + repoConfig.getTitle() + ", impl: " + repoConfig.getRepositoryImplConfig().getType() + ")!", e);
} catch (RepositoryException e) {
throw new IllegalStateException("Unable to initialise Repository (id: " + repoConfig.getID() + ", title: " + repoConfig.getTitle() + ", impl: " + repoConfig.getRepositoryImplConfig().getType() + ")!", e);
}
//(2) init the resourceLoader
loader = new ResourceLoader(new RdfResourceImporter(repository, baseUri), indexingConfig.isFailOnError());
value = config.get(PARAM_IMPORTED_FOLDER);
//set the folder for imported files
String importedFolderName;
if (value != null && !value.toString().isEmpty()) {
importedFolderName = value.toString();
} else {
importedFolderName = DEFAULT_IMPORTED_FOLDER_NAME;
}
File importedFolder = new File(indexingConfig.getSourceFolder(), importedFolderName);
log.info("Imported RDF File Folder: {}", importedFolder);
this.loader.setImportedDir(importedFolder);
//check if importing is deactivated
//default is true
boolean importSource = true;
value = config.get(PARAM_IMPORT_SOURCE);
if (value != null) {
importSource = Boolean.parseBoolean(value.toString());
}
if (importSource) {
// if we need to import ... check the source config
log.info("Importing RDF data from:");
value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
if (value == null) {
//if not set use the default
value = DEFAULT_SOURCE_FOLDER_NAME;
}
for (String source : value.toString().split(",")) {
File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
if (sourceFileOrDirectory.exists()) {
//register the configured source with the ResourceLoader
this.loader.addResource(sourceFileOrDirectory);
} else {
if (FilenameUtils.getExtension(source).isEmpty()) {
//RDF files.
if (!sourceFileOrDirectory.mkdirs()) {
log.warn("Unable to create directory {} configured to improt RDF data from. " + "You will need to create this directory manually before copying the" + "RDF files into it.", sourceFileOrDirectory);
this.loader.addResource(sourceFileOrDirectory);
}
} else {
log.warn("Unable to find RDF source {} within the indexing Source folder {}", source, indexingConfig.getSourceFolder());
}
}
}
if (log.isInfoEnabled()) {
for (String registeredSource : loader.getResources(ResourceState.REGISTERED)) {
log.info(" > " + registeredSource);
}
}
} else {
log.info("Importing RDF data deactivated by parameer {}={}" + PARAM_IMPORT_SOURCE, value);
}
}
use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class Utils method getTDBDataset.
/**
* uses the parsed configuration to get/create the Jena TDB store
* @param config A configuration as parsed to {@link IndexingComponent#setConfiguration(Map)}
* @return the opened/created Jena TDB dataset
* @throws IllegalArgumentException if the config is <code>null</code>; is
* missing a value for the {@link IndexingConfig#KEY_INDEXING_CONFIG} or
* {@link #initTDBDataset(File)} throws an IllegalArgumentException
*/
public static DatasetGraphTDB getTDBDataset(Map<String, Object> config) {
IndexingConfig indexingConfig = (IndexingConfig) config.get(KEY_INDEXING_CONFIG);
if (indexingConfig == null) {
throw new IllegalArgumentException("No IndexingConfig object present as value of key '" + KEY_INDEXING_CONFIG + "'!");
}
Object value = config.get(PARAM_MODEL_DIRECTORY);
File modelLocation;
if (value == null) {
modelLocation = new File(indexingConfig.getSourceFolder(), DEFAULT_MODEL_DIRECTORY);
} else {
modelLocation = new File(indexingConfig.getSourceFolder(), value.toString());
}
return initTDBDataset(modelLocation);
}
use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class SolrYardIndexingDestinationTest method testMissingBoostConfig.
@Test(expected = IllegalArgumentException.class)
public void testMissingBoostConfig() {
String testName = "missingBoostConfig";
IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
};
config.getIndexingDestination();
}
Aggregations