use of org.openrdf.model.Literal in project stanbol by apache.
the class RdfRepresentationTest method testPlainLiteralToTextConversion.
/*--------------------------------------------------------------------------
* Additional Tests for special Features of the Clerezza based implementation
*
* This includes mainly support for additional types like PlainLiteral,
* TypedLiteral, UriRefs. The conversion to such types as well as getter for
* such types.
*--------------------------------------------------------------------------
*/
/**
* {@link PlainLiteral} is used for natural language text in the Clerezza
* RDF API. This tests if adding {@link PlainLiteral}s to the
* {@link Representation#add(String, Object)} method makes them available
* as {@link Text} instances via the {@link Representation} API (e.g.
* {@link Representation#get(String, String...)}).
*/
@Test
public void testPlainLiteralToTextConversion() {
String field = "urn:test.RdfRepresentation:test.field";
Literal noLangLiteral = valueFactory.getSesameFactory().createLiteral("A plain literal without Language");
Literal enLiteral = valueFactory.getSesameFactory().createLiteral("An english literal", "en");
Literal deLiteral = valueFactory.getSesameFactory().createLiteral("Ein Deutsches Literal", "de");
Literal deATLiteral = valueFactory.getSesameFactory().createLiteral("Ein Topfen Verband hilft bei Zerrungen", "de-AT");
Collection<Literal> plainLiterals = Arrays.asList(noLangLiteral, enLiteral, deLiteral, deATLiteral);
Representation rep = createRepresentation(null);
rep.add(field, plainLiterals);
//now test, that the Plain Literals are available as natural language
//tests via the Representation Interface!
//1) one without a language
Iterator<Text> noLangaugeTexts = rep.get(field, (String) null);
assertTrue(noLangaugeTexts.hasNext());
Text noLanguageText = noLangaugeTexts.next();
assertEquals(noLangLiteral.getLabel(), noLanguageText.getText());
assertNull(noLanguageText.getLanguage());
//only a single result
assertFalse(noLangaugeTexts.hasNext());
//2) one with a language
Iterator<Text> enLangaugeTexts = rep.get(field, "en");
assertTrue(enLangaugeTexts.hasNext());
Text enLangageText = enLangaugeTexts.next();
assertEquals(enLiteral.getLabel(), enLangageText.getText());
assertEquals(enLiteral.getLanguage(), enLangageText.getLanguage());
//only a single result
assertFalse(enLangaugeTexts.hasNext());
//3) test to get all natural language values
Set<String> stringValues = new HashSet<String>();
for (Literal plainLiteral : plainLiterals) {
stringValues.add(plainLiteral.getLabel());
}
Iterator<Text> texts = rep.getText(field);
while (texts.hasNext()) {
assertTrue(stringValues.remove(texts.next().getText()));
}
assertTrue(stringValues.isEmpty());
}
use of org.openrdf.model.Literal in project stanbol by apache.
the class RdfRepresentation method addNaturalText.
@Override
public void addNaturalText(String field, String text, String... languages) throws IllegalArgumentException {
if (field == null) {
throw new IllegalArgumentException("The parsed field MUST NOT be NULL");
} else if (field.isEmpty()) {
throw new IllegalArgumentException("The parsed field MUST NOT be Empty");
}
if (text == null) {
throw new IllegalArgumentException("NULL values are not supported by Representations");
}
URI property = sesameFactory.createURI(field);
if (languages == null || languages.length == 0) {
languages = new String[] { null };
}
for (String language : languages) {
Literal value = sesameFactory.createLiteral(text, language);
addValue(property, value);
}
}
use of org.openrdf.model.Literal in project Anserini by castorini.
the class TrainingDataGenerator method birthdate.
/**
* Generate training data for property birth date
* <p>
* Note: this function might need some refactoring when we add more properties
*/
void birthdate() throws ParseException, IOException {
QueryParser queryParser = new QueryParser(FIELD_BIRTHDATE, getKbIndexAnalyzer());
queryParser.setAllowLeadingWildcard(true);
Query q = queryParser.parse("*");
LOG.info("Starting the search using query: {}", q.toString());
// Collect all matching documents in a set of matching doc ids
Set<Integer> matchingDocIds = new HashSet<>();
getKbIndexSearcher().search(q, new CheckHits.SetCollector(matchingDocIds));
LOG.info("Found {} matching documents, retrieving...", matchingDocIds.size());
// Process the retrieved document ids
matchingDocIds.forEach((Integer docId) -> {
Document doc = null;
try {
doc = getKbIndexReader().document(docId);
} catch (IOException e) {
LOG.warn("Error retrieving document with id: {}. Ignoring.", docId);
return;
}
String freebaseURI = doc.get(ObjectTriplesLuceneDocumentGenerator.FIELD_SUBJECT);
// We might have multiple values for the field
String[] birthdates = doc.getValues(FIELD_BIRTHDATE);
// Get the freebase English label of this entity
String[] labels = doc.getValues(FIELD_LABEL);
String englishLabel = null;
for (String label : labels) {
Literal literal = NTriplesUtil.parseLiteral(label, valueFactory);
if (literal.getLanguage().orElse("N/A").toLowerCase().equals("en")) {
englishLabel = literal.stringValue();
break;
}
}
// Basically make sure label is not null, for some entities in freebase
if (englishLabel == null || freebaseURI == null || birthdates == null || birthdates.length == 0)
// Ignore this search
return;
String freebaseId = freebaseUriToFreebaseId(freebaseURI);
for (String birthdate : birthdates) {
// Get string value
String birthdateVal = extractValueFromTypedLiteralString(birthdate);
// Write property value as training data
writeToTrainingFile(TRAINING_DATA_OUTPUT_FILE_EXAMPLES, freebaseId, englishLabel, birthdateVal);
}
// TODO - After building an index for the mentions of Freebase entities in ClueWeb,
// we need to get the ClueWeb mentions of this freebase entity and write them to a separate file
});
}
Aggregations