Search in sources :

Example 21 with Literal

use of org.openrdf.model.Literal in project stanbol by apache.

the class RdfRepresentationTest method testPlainLiteralToTextConversion.

/*--------------------------------------------------------------------------
     * Additional Tests for special Features of the Clerezza based implementation
     * 
     * This includes mainly support for additional types like PlainLiteral,
     * TypedLiteral, UriRefs. The conversion to such types as well as getter for
     * such types.
     *--------------------------------------------------------------------------
     */
/**
     * {@link PlainLiteral} is used for natural language text in the Clerezza
     * RDF API. This tests if adding {@link PlainLiteral}s to the
     * {@link Representation#add(String, Object)} method makes them available
     * as {@link Text} instances via the {@link Representation} API (e.g. 
     * {@link Representation#get(String, String...)}).
     */
@Test
public void testPlainLiteralToTextConversion() {
    String field = "urn:test.RdfRepresentation:test.field";
    Literal noLangLiteral = valueFactory.getSesameFactory().createLiteral("A plain literal without Language");
    Literal enLiteral = valueFactory.getSesameFactory().createLiteral("An english literal", "en");
    Literal deLiteral = valueFactory.getSesameFactory().createLiteral("Ein Deutsches Literal", "de");
    Literal deATLiteral = valueFactory.getSesameFactory().createLiteral("Ein Topfen Verband hilft bei Zerrungen", "de-AT");
    Collection<Literal> plainLiterals = Arrays.asList(noLangLiteral, enLiteral, deLiteral, deATLiteral);
    Representation rep = createRepresentation(null);
    rep.add(field, plainLiterals);
    //now test, that the Plain Literals are available as natural language
    //tests via the Representation Interface!
    //1) one without a language
    Iterator<Text> noLangaugeTexts = rep.get(field, (String) null);
    assertTrue(noLangaugeTexts.hasNext());
    Text noLanguageText = noLangaugeTexts.next();
    assertEquals(noLangLiteral.getLabel(), noLanguageText.getText());
    assertNull(noLanguageText.getLanguage());
    //only a single result
    assertFalse(noLangaugeTexts.hasNext());
    //2) one with a language
    Iterator<Text> enLangaugeTexts = rep.get(field, "en");
    assertTrue(enLangaugeTexts.hasNext());
    Text enLangageText = enLangaugeTexts.next();
    assertEquals(enLiteral.getLabel(), enLangageText.getText());
    assertEquals(enLiteral.getLanguage(), enLangageText.getLanguage());
    //only a single result
    assertFalse(enLangaugeTexts.hasNext());
    //3) test to get all natural language values
    Set<String> stringValues = new HashSet<String>();
    for (Literal plainLiteral : plainLiterals) {
        stringValues.add(plainLiteral.getLabel());
    }
    Iterator<Text> texts = rep.getText(field);
    while (texts.hasNext()) {
        assertTrue(stringValues.remove(texts.next().getText()));
    }
    assertTrue(stringValues.isEmpty());
}
Also used : Literal(org.openrdf.model.Literal) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) HashSet(java.util.HashSet) RepresentationTest(org.apache.stanbol.entityhub.test.model.RepresentationTest) Test(org.junit.Test)

Example 22 with Literal

use of org.openrdf.model.Literal in project stanbol by apache.

the class RdfRepresentation method addNaturalText.

@Override
public void addNaturalText(String field, String text, String... languages) throws IllegalArgumentException {
    if (field == null) {
        throw new IllegalArgumentException("The parsed field MUST NOT be NULL");
    } else if (field.isEmpty()) {
        throw new IllegalArgumentException("The parsed field MUST NOT be Empty");
    }
    if (text == null) {
        throw new IllegalArgumentException("NULL values are not supported by Representations");
    }
    URI property = sesameFactory.createURI(field);
    if (languages == null || languages.length == 0) {
        languages = new String[] { null };
    }
    for (String language : languages) {
        Literal value = sesameFactory.createLiteral(text, language);
        addValue(property, value);
    }
}
Also used : Literal(org.openrdf.model.Literal) URI(org.openrdf.model.URI)

Example 23 with Literal

use of org.openrdf.model.Literal in project Anserini by castorini.

the class TrainingDataGenerator method birthdate.

/**
   * Generate training data for property birth date
   * <p>
   * Note: this function might need some refactoring when we add more properties
   */
void birthdate() throws ParseException, IOException {
    QueryParser queryParser = new QueryParser(FIELD_BIRTHDATE, getKbIndexAnalyzer());
    queryParser.setAllowLeadingWildcard(true);
    Query q = queryParser.parse("*");
    LOG.info("Starting the search using query: {}", q.toString());
    // Collect all matching documents in a set of matching doc ids
    Set<Integer> matchingDocIds = new HashSet<>();
    getKbIndexSearcher().search(q, new CheckHits.SetCollector(matchingDocIds));
    LOG.info("Found {} matching documents, retrieving...", matchingDocIds.size());
    // Process the retrieved document ids
    matchingDocIds.forEach((Integer docId) -> {
        Document doc = null;
        try {
            doc = getKbIndexReader().document(docId);
        } catch (IOException e) {
            LOG.warn("Error retrieving document with id: {}. Ignoring.", docId);
            return;
        }
        String freebaseURI = doc.get(ObjectTriplesLuceneDocumentGenerator.FIELD_SUBJECT);
        // We might have multiple values for the field
        String[] birthdates = doc.getValues(FIELD_BIRTHDATE);
        // Get the freebase English label of this entity
        String[] labels = doc.getValues(FIELD_LABEL);
        String englishLabel = null;
        for (String label : labels) {
            Literal literal = NTriplesUtil.parseLiteral(label, valueFactory);
            if (literal.getLanguage().orElse("N/A").toLowerCase().equals("en")) {
                englishLabel = literal.stringValue();
                break;
            }
        }
        // Basically make sure label is not null, for some entities in freebase
        if (englishLabel == null || freebaseURI == null || birthdates == null || birthdates.length == 0)
            // Ignore this search
            return;
        String freebaseId = freebaseUriToFreebaseId(freebaseURI);
        for (String birthdate : birthdates) {
            // Get string value
            String birthdateVal = extractValueFromTypedLiteralString(birthdate);
            // Write property value as training data
            writeToTrainingFile(TRAINING_DATA_OUTPUT_FILE_EXAMPLES, freebaseId, englishLabel, birthdateVal);
        }
    // TODO - After building an index for the mentions of Freebase entities in ClueWeb,
    // we need to get the ClueWeb mentions of this freebase entity and write them to a separate file
    });
}
Also used : QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Query(org.apache.lucene.search.Query) CheckHits(org.apache.lucene.search.CheckHits) Literal(org.openrdf.model.Literal) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) HashSet(java.util.HashSet)

Aggregations

Literal (org.openrdf.model.Literal)23 URI (org.openrdf.model.URI)12 Test (org.junit.Test)10 Statement (org.openrdf.model.Statement)5 NotifyingSailConnection (org.openrdf.sail.NotifyingSailConnection)5 SailConnection (org.openrdf.sail.SailConnection)5 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)3 RepresentationTest (org.apache.stanbol.entityhub.test.model.RepresentationTest)3 ValueFactory (org.openrdf.model.ValueFactory)3 BindingSet (org.openrdf.query.BindingSet)3 Edge (com.tinkerpop.blueprints.Edge)2 Vertex (com.tinkerpop.blueprints.Vertex)2 Collection (java.util.Collection)2 Date (java.util.Date)2 HashSet (java.util.HashSet)2 XMLGregorianCalendar (javax.xml.datatype.XMLGregorianCalendar)2 Text (org.apache.stanbol.entityhub.servicesapi.model.Text)2 Resource (org.openrdf.model.Resource)2 Value (org.openrdf.model.Value)2 LiteralImpl (org.openrdf.model.impl.LiteralImpl)2