Search in sources :

Example 11 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class SolrYardIndexingDestinationTest method validateSolrDestination.

/**
     * Checks if the SolrYardIndexingDestination returned by the 
     * {@link IndexingConfig} is valid and functional
     * @param config the configuration
     * @throws YardException indicates problems while working with the {@link SolrYard}
     * returned by {@link IndexingDestination#getYard()}
     * @throws IOException indicates problems while validating the SolrArchives
     * created by the {@link IndexingDestination#finalise()} method
     */
private void validateSolrDestination(IndexingConfig config) throws YardException, IOException {
    //get the destination
    IndexingDestination destination = config.getIndexingDestination();
    assertNotNull(destination);
    assertEquals(destination.getClass(), SolrYardIndexingDestination.class);
    //initialise
    assertTrue(destination.needsInitialisation());
    destination.initialise();
    //test that the returned Yard instance is functional
    Yard yard = destination.getYard();
    assertNotNull(yard);
    assertEquals(yard.getClass(), SolrYard.class);
    Representation rep = yard.getValueFactory().createRepresentation("http://www.example.com/entity#123");
    rep.add(NamespaceEnum.rdfs + "label", "test");
    rep.add(NamespaceEnum.rdfs + "description", "Representation to test storage while indexing");
    rep.add(RdfResourceEnum.entityRank.getUri(), Float.valueOf(0.8f));
    yard.store(rep);
    //finalise
    destination.finalise();
    //test the archives
    File expectedSolrArchiveFile = new File(config.getDistributionFolder(), config.getName() + ".solrindex.zip");
    assertTrue(expectedSolrArchiveFile.isFile());
    // validate the archive
    ZipFile archive = new ZipFile(expectedSolrArchiveFile);
    Set<String> expected = new HashSet<String>(EXPECTED_INDEX_ARCHIVE_FILE_NAMES);
    for (Enumeration<? extends ZipEntry> entries = archive.entries(); entries.hasMoreElements(); ) {
        ZipEntry entry = entries.nextElement();
        //the name of the index MUST be the root folder within the Archive!
        assertTrue(entry.getName().startsWith(config.getName()));
        String name = FilenameUtils.getName(entry.getName());
        if (expected.remove(name)) {
            log.info("found expected Entry '{}'", entry.getName());
        }
        Assert.assertFalse("found unexpected Entry '" + entry.getName() + "' in " + "SolrIndexArchive", UNEXPECTED_INDEX_ARCHIVE_FILE_NAMES.contains(name));
    }
    assertTrue("missing Files in index archive: " + expected, expected.isEmpty());
//TODO: reimplement to validate the created bundle!
//        //check for the solrArchive reference file and validate required properties
//        File expectedSolrArchiveReferenceFile = 
//            new File(,config.getName()+".solrindex.ref");
//        assertTrue(expectedSolrArchiveReferenceFile.isFile());
//        Properties solrRefProperties = new Properties();
//        solrRefProperties.load(new FileInputStream(expectedSolrArchiveReferenceFile));
//        assertTrue(solrRefProperties.getProperty("Index-Archive").equals(expectedSolrArchiveFile.getName()));
//        assertTrue(solrRefProperties.getProperty("Name") != null);
}
Also used : SolrYard(org.apache.stanbol.entityhub.yard.solr.impl.SolrYard) Yard(org.apache.stanbol.entityhub.servicesapi.yard.Yard) ZipFile(java.util.zip.ZipFile) ZipEntry(java.util.zip.ZipEntry) IndexingDestination(org.apache.stanbol.entityhub.indexing.core.IndexingDestination) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) ZipFile(java.util.zip.ZipFile) File(java.io.File) HashSet(java.util.HashSet)

Example 12 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class DummyEntityIdSource method next.

@Override
public EntityScore next() {
    Representation next = entiyIterator.next();
    Number score = next.getFirst(RdfResourceEnum.entityRank.getUri(), Number.class);
    return new EntityScore(next.getId(), score == null ? 0 : score.floatValue());
}
Also used : Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation)

Example 13 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class FieldValueFilterTest method testExcludeConfig.

@Test
public void testExcludeConfig() {
    EntityProcessor filter = new FieldValueFilter(nsPrefixProvider, "rdf:type", "*;!foaf:Person");
    Representation r = getRepresentation(NamespaceEnum.foaf + "Person");
    Assert.assertNull(filter.process(r));
    r = getRepresentation(NamespaceEnum.skos + "Concept");
    Assert.assertNotNull(filter.process(r));
    r = getRepresentation(NamespaceEnum.skos + "Concept", NamespaceEnum.foaf + "Person");
    Assert.assertNotNull(filter.process(r));
    //test empty value
    filter = new FieldValueFilter(nsPrefixProvider, "skos:releated", "*;!null");
    Assert.assertNull(filter.process(r));
    filter = new FieldValueFilter(nsPrefixProvider, "skos:releated", "*;!");
    Assert.assertNull(filter.process(r));
    filter = new FieldValueFilter(nsPrefixProvider, "skos:releated", "*;!;!foaf:Person");
    Assert.assertNull(filter.process(r));
}
Also used : Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) EntityProcessor(org.apache.stanbol.entityhub.indexing.core.EntityProcessor) Test(org.junit.Test)

Example 14 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class EntityDataBasedIndexingDaemon method run.

@Override
public void run() {
    log.info("...start iterating over Entity data");
    EntityDataIterator dataIterator = dataIterable.entityDataIterator();
    while (dataIterator.hasNext()) {
        Long start = Long.valueOf(System.currentTimeMillis());
        String id = dataIterator.next();
        Representation rep = null;
        Float score;
        if (!scoreProvider.needsData()) {
            score = scoreProvider.process(id);
        } else {
            rep = dataIterator.getRepresentation();
            score = scoreProvider.process(rep);
        }
        //normalise the score
        if (normaliser != null) {
            score = normaliser.normalise(score);
        }
        if (//all entities are indexed anyway
        indexAllEntitiesState || //no score available
        score == null || score.compareTo(ScoreNormaliser.ZERO) >= 0) {
            //score >= 0
            if (rep == null) {
                rep = dataIterator.getRepresentation();
            }
            produce(rep, score, start);
        }
    // else ignore this entity
    }
    setFinished();
}
Also used : Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) EntityDataIterator(org.apache.stanbol.entityhub.indexing.core.EntityDataIterator)

Example 15 with Representation

use of org.apache.stanbol.entityhub.servicesapi.model.Representation in project stanbol by apache.

the class EntityhubImpl method loadEntity.

/**
     * Loads the Entity based on the parsed representation. The parsed
     * {@link Representation} can be both the data and the metadata. In case the
     * parsed representation are metadat the id of the returned Entity will be
     * not the same as the id of the parsed {@link Representation}.
     * @param rep the representation or metadata of an entity
     * @return the created Entity including both data and metadata or 
     * <code>null</code> if the parsed Representation does not represent a 
     * Representation managed by the Entityhub (this may be the case if an other
     * thread has deleted that Entity in the meantime)
     * @throws YardException On any error with the parsed Yard.
     */
private Entity loadEntity(Representation rep) throws YardException {
    if (rep != null) {
        Representation data;
        Representation metadata = null;
        String entityId = ModelUtils.getAboutRepresentation(rep);
        if (entityId != null) {
            data = entityhubYard.getRepresentation(entityId);
            metadata = rep;
        } else {
            data = rep;
            //needed for logs
            entityId = rep.getId();
        }
        if (data != null) {
            metadata = lookupMetadata(rep.getId(), true);
            return new EntityImpl(config.getID(), data, metadata);
        } else {
            log.warn("Unable find representation for Entity {} (metadata: {}", entityId, metadata);
            return null;
        }
    } else {
        return null;
    }
}
Also used : EntityImpl(org.apache.stanbol.entityhub.core.model.EntityImpl) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation)

Aggregations

Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)198 Test (org.junit.Test)117 Text (org.apache.stanbol.entityhub.servicesapi.model.Text)32 HashSet (java.util.HashSet)31 Yard (org.apache.stanbol.entityhub.servicesapi.yard.Yard)25 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)16 YardException (org.apache.stanbol.entityhub.servicesapi.yard.YardException)15 ValueFactory (org.apache.stanbol.entityhub.servicesapi.model.ValueFactory)14 Reference (org.apache.stanbol.entityhub.servicesapi.model.Reference)12 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)12 ArrayList (java.util.ArrayList)11 RdfRepresentation (org.apache.stanbol.entityhub.model.sesame.RdfRepresentation)10 IOException (java.io.IOException)9 IRI (org.apache.clerezza.commons.rdf.IRI)9 ResponseBuilder (javax.ws.rs.core.Response.ResponseBuilder)8 Graph (org.apache.clerezza.commons.rdf.Graph)8 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)8 RdfRepresentation (org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation)8 RdfValueFactory (org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory)8 EntityhubException (org.apache.stanbol.entityhub.servicesapi.EntityhubException)8