use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class RdfIndexingSourceTest method testEntityDataProvider.
@Test
public void testEntityDataProvider() {
log.info(" --- testEntityDataProvider ---");
String testName = "provider";
IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
};
EntityIterator entityIdIterator = config.getEntityIdIterator();
assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
if (entityIdIterator.needsInitialisation()) {
entityIdIterator.initialise();
}
EntityDataProvider dataProvider = config.getEntityDataProvider();
try {
assertNotNull(dataProvider);
if (dataProvider.needsInitialisation()) {
dataProvider.initialise();
}
assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
long count = 0;
while (entityIdIterator.hasNext()) {
EntityScore entityScore = entityIdIterator.next();
assertNotNull(entityScore);
assertNotNull(entityScore.id);
validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
count++;
}
// check if all entities where found
assertEquals(String.format("%s Entities expected but %s processed!", NUMBER_OF_ENTITIES_EXPECTED, count), NUMBER_OF_ENTITIES_EXPECTED, count);
} finally {
// we need to ensure close is called as otherwise other tests might fail
dataProvider.close();
}
}
use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class VcardIndexingSource method setConfiguration.
@Override
public void setConfiguration(Map<String, Object> config) {
// init fields
IndexingConfig indexingConfig = (IndexingConfig) config.get(IndexingConfig.KEY_INDEXING_CONFIG);
loader = new ResourceLoader(this, true, false);
// vcard files are imported from a special folder in the destination dir.
// this folder needs to be deleted/(re-)created first.
vcardFileImportFolder = new File(indexingConfig.getDestinationFolder(), "vcard");
if (vcardFileImportFolder.exists()) {
if (vcardFileImportFolder.isDirectory()) {
try {
FileUtils.deleteDirectory(vcardFileImportFolder);
} catch (IOException e) {
throw new IllegalStateException("Unable to delete Folder " + vcardFileImportFolder.getAbsolutePath() + " containing the vCard files from a" + "previouse indexing! Please remove this folder manually.", e);
}
} else if (!vcardFileImportFolder.delete()) {
throw new IllegalStateException("Unable to delete File " + vcardFileImportFolder.getAbsolutePath() + " containing the vCard data from a" + "previouse indexing! Please remove this File manually.");
}
}
if (!vcardFileImportFolder.mkdirs()) {
throw new IllegalStateException("Unable to delete Folder " + vcardFileImportFolder.getAbsolutePath() + " containing the vCard files from a" + "previouse indexing! Please remove this folder manually.");
}
// load config
Object value;
log.debug("load vcard resources from :");
value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
if (value == null) {
// if not set use the default
value = DEFAULT_SOURCE_FOLDER_NAME;
}
for (String source : value.toString().split(",")) {
File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
if (sourceFileOrDirectory.exists()) {
// register the configured source with the ResourceLoader
this.loader.addResource(sourceFileOrDirectory);
} else {
if (FilenameUtils.getExtension(source).isEmpty()) {
// RDF files.
if (!sourceFileOrDirectory.mkdirs()) {
log.warn("Unable to create directory {} configured to improt source data from. " + "You will need to create this directory manually before copying the" + "Source files into it.", sourceFileOrDirectory);
// this would not be necessary because the directory will
// be empty - however I like to be consistent and have
// all configured and existent files & dirs added the the
// resource loader
this.loader.addResource(sourceFileOrDirectory);
}
} else {
log.warn("Unable to find vcard source {} within the indexing Source folder ", source, indexingConfig.getSourceFolder());
}
}
}
if (log.isDebugEnabled()) {
for (String registeredSource : loader.getResources(ResourceState.REGISTERED)) {
log.debug(" > " + registeredSource);
}
}
// parse the encoding
value = config.get(PARAM_CHARSET);
if (value != null) {
String encoding = value.toString();
if (encoding.isEmpty()) {
// use plattform encoding if empty
charset = Charset.defaultCharset();
} else {
try {
charset = Charset.forName(encoding);
} catch (RuntimeException e) {
throw new IllegalStateException("The configured encoding '" + encoding + "' is not supported by this Plattform", e);
}
}
} else {
// use plattorm encoding if missing
charset = Charset.defaultCharset();
}
// parse the prefix
value = config.get(PARAM_PREFIX);
if (value == null || value.toString().isEmpty()) {
throw new IllegalStateException("Teh configuration is missing the required parameter 'prefix'!");
} else {
prefix = value.toString();
// set the typeSeperatorChar based on the kind of parsed prefix
if (prefix.endsWith("#")) {
typeSeperatorChar = '.';
} else if (prefix.endsWith("/")) {
typeSeperatorChar = '/';
} else if (prefix.endsWith(":")) {
typeSeperatorChar = ':';
} else if (prefix.startsWith("urn:")) {
// maybe an urn without an tailing ':'
prefix = prefix + ':';
typeSeperatorChar = ':';
} else if (prefix.indexOf("://") > 0) {
// maybe an url without an tailing '/' or '#'
prefix = prefix + '/';
}
// else ... no idea what kind of prefix ... use the default '/'
}
}
use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class RdfIndexingSource method setConfiguration.
@Override
public void setConfiguration(Map<String, Object> config) {
IndexingConfig indexingConfig = (IndexingConfig) config.get(IndexingConfig.KEY_INDEXING_CONFIG);
// first init the RDF Model
this.indexingDataset = Utils.getTDBDataset(config);
// second we need to check if we need to import RDF files to the RDF model
// look if we need want to use an import filter
Object value = config.get(PARAM_IMPORT_FILTER);
if (value == null) {
log.info("No RDF Import Filter configured");
importFilter = null;
} else {
String[] filterNames = value.toString().split(",");
List<RdfImportFilter> filters = new ArrayList<RdfImportFilter>();
ClassLoader cl = indexingConfig.getClass().getClassLoader();
for (String filterName : filterNames) {
filterName = filterName.trim();
try {
Class<? extends RdfImportFilter> importFilterClass = cl.loadClass(filterName).asSubclass(RdfImportFilter.class);
RdfImportFilter filter = importFilterClass.newInstance();
filter.setConfiguration(config);
filters.add(filter);
log.info("Use RDF ImportFilter {} (type: {})", importFilter, importFilterClass.getSimpleName());
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Configured RdfImportFilter '" + filterName + "' not found", e);
} catch (InstantiationException e) {
throw new IllegalArgumentException("Configured RdfImportFilter '" + filterName + "' can not be instantiated", e);
} catch (IllegalAccessException e) {
throw new IllegalArgumentException("Configured RdfImportFilter '" + filterName + "' can not be created", e);
}
}
if (filters.isEmpty()) {
this.importFilter = null;
} else if (filters.size() == 1) {
this.importFilter = filters.get(0);
} else {
this.importFilter = new UnionImportFilter(filters.toArray(new RdfImportFilter[filters.size()]));
}
}
boolean failOnError = indexingConfig.isFailOnError();
// create the ResourceLoader
this.loader = new ResourceLoader(new RdfResourceImporter(indexingDataset, importFilter), failOnError);
value = config.get(PARAM_IMPORTED_FOLDER);
String importedFolderName;
if (value != null && !value.toString().isEmpty()) {
importedFolderName = value.toString();
} else {
importedFolderName = DEFAULT_IMPORTED_FOLDER_NAME;
}
File importedFolder = new File(indexingConfig.getSourceFolder(), importedFolderName);
log.info("Imported RDF File Folder: {}", importedFolder);
this.loader.setImportedDir(importedFolder);
// check if importing is deactivated
// default is true
boolean importSource = true;
value = config.get(PARAM_IMPORT_SOURCE);
if (value != null) {
importSource = Boolean.parseBoolean(value.toString());
}
if (importSource) {
// if we need to import ... check the source config
log.info("Importing RDF data from:");
value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
if (value == null) {
// if not set use the default
value = DEFAULT_SOURCE_FOLDER_NAME;
}
for (String source : value.toString().split(",")) {
File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
if (sourceFileOrDirectory.exists()) {
// register the configured source with the ResourceLoader
this.loader.addResource(sourceFileOrDirectory);
} else {
if (FilenameUtils.getExtension(source).isEmpty()) {
// RDF files.
if (!sourceFileOrDirectory.mkdirs()) {
log.warn("Unable to create directory {} configured to improt RDF data from. " + "You will need to create this directory manually before copying the" + "RDF files into it.", sourceFileOrDirectory);
// this would not be necessary because the directory will
// be empty - however I like to be consistent and have
// all configured and existent files & dirs added the the
// resource loader
this.loader.addResource(sourceFileOrDirectory);
}
} else {
log.warn("Unable to find RDF source {} within the indexing Source folder ", source, indexingConfig.getSourceFolder());
}
}
}
if (log.isInfoEnabled()) {
for (String registeredSource : loader.getResources(ResourceState.REGISTERED)) {
log.info(" > " + registeredSource);
}
}
} else {
log.info("Importing RDF data deactivated by parameer {}={}" + PARAM_IMPORT_SOURCE, value);
}
// STANBOL-765: parsed bnode-prefix from parsed configuration.
value = config.get(PARAM_BNODE_STATE);
final Boolean bnodeState;
if (value != null) {
bnodeState = value instanceof Boolean ? (Boolean) value : Boolean.parseBoolean(value.toString());
} else if (config.containsKey(PARAM_BNODE_STATE)) {
// support key without value
bnodeState = true;
} else {
// undefined
bnodeState = null;
}
if (bnodeState == null || bnodeState) {
// null or enabled -> consider prefix
value = config.get(PARAM_BNODE_PREFIX);
if (value != null) {
try {
new URI(value.toString());
} catch (URISyntaxException e) {
throw new IllegalArgumentException("The configured " + PARAM_BNODE_PREFIX + "='" + value.toString() + "' MUST BE a valid URI!");
}
bnodePrefix = value.toString();
} else if (bnodeState != null) {
// use default prefix if bnodeState is true
bnodePrefix = String.format("urn:bnode:%s:", indexingConfig.getName());
}
// else bnodeState == null and no custom prefix -> disable by default
}
if (bnodePrefix != null) {
log.info("Indexing of Bnodes enabled (prefix: {}", bnodePrefix);
} else {
log.info("Indexing of Bnodes disabled");
}
}
use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class RdfIndexingSourceTest method testBlankNodeSupport.
@Test
public void testBlankNodeSupport() {
log.info(" --- testBlankNodeSupport ---");
String testName = "bnode";
IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
};
EntityDataIterable iterable = config.getDataIterable();
assertNotNull(iterable);
assertEquals(iterable.getClass(), RdfIndexingSource.class);
assertTrue(iterable.needsInitialisation());
iterable.initialise();
// ((RdfIndexingSource)iterable).debug();
EntityDataIterator it = iterable.entityDataIterator();
long count = 0;
while (it.hasNext()) {
String entity = it.next();
log.info("validate Entity " + entity);
assertNotNull(entity);
validateRepresentation(it.getRepresentation(), entity);
count++;
}
// check if all entities where indexed
// Expected are 3 entities First France from france.rdf
// and two from BlankNode Entities in bnode.nt
assertEquals(String.format("> %s Entities expected but only %s processed!", 3, count), 3, count);
}
use of org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig in project stanbol by apache.
the class RdfIndexingSourceTest method testQuadsImport.
/**
* Tests support for Quads (STANBOL-764)
*/
@Test
public void testQuadsImport() {
log.info(" --- testQuadsImport ---");
String testName = "quads";
IndexingConfig config = new IndexingConfig(CONFIG_ROOT + File.separatorChar + testName, CONFIG_ROOT + '/' + testName) {
};
EntityIterator entityIdIterator = config.getEntityIdIterator();
assertNotNull("Unable to perform test whithout EntityIterator", entityIdIterator);
if (entityIdIterator.needsInitialisation()) {
entityIdIterator.initialise();
}
EntityDataProvider dataProvider = config.getEntityDataProvider();
assertNotNull(dataProvider);
// there are test data to load
assertTrue(dataProvider.needsInitialisation());
dataProvider.initialise();
assertEquals(dataProvider.getClass(), RdfIndexingSource.class);
long count = 0;
while (entityIdIterator.hasNext()) {
EntityScore entityScore = entityIdIterator.next();
assertNotNull(entityScore);
assertNotNull(entityScore.id);
validateRepresentation(dataProvider.getEntityData(entityScore.id), entityScore.id);
count++;
}
// check if all 9 entities where imported to the default dataset
// (and not named graphs)
assertEquals(String.format("%s Entities expected but %s processed!", 9, count), 9, count);
}
Aggregations