Search in sources :

Example 1 with SKOSAnalyzer

use of at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer in project lucene-skos by behas.

the class SKOSLabelFilterTest method setUp.

@Before
@Override
public void setUp() throws Exception {
    super.setUp();
    skosAnalyzer = new SKOSAnalyzer(skosEngine, SKOSAnalyzer.ExpansionType.LABEL);
    writer = new IndexWriter(directory, new IndexWriterConfig(skosAnalyzer));
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Before(org.junit.Before)

Example 2 with SKOSAnalyzer

use of at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer in project lucene-skos by behas.

the class SKOSURIFilterTest method setUp.

@Before
@Override
public void setUp() throws Exception {
    super.setUp();
    skosAnalyzer = new SKOSAnalyzer(skosEngine, ExpansionType.URI);
    writer = new IndexWriter(directory, new IndexWriterConfig(skosAnalyzer));
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Before(org.junit.Before)

Example 3 with SKOSAnalyzer

use of at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer in project lucene-skos by behas.

the class SKOSStandardQueryParserTest method setUp.

@Before
public void setUp() throws Exception {
    // adding some test data
    skosEngine = new SKOSEngineMock();
    skosEngine.addEntry("http://example.com/concept/1", SKOSType.PREF, "jumps");
    skosEngine.addEntry("http://example.com/concept/1", SKOSType.ALT, "leaps", "hops");
    skosEngine.addEntry("http://example.com/concept/2", SKOSType.PREF, "quick");
    skosEngine.addEntry("http://example.com/concept/2", SKOSType.ALT, "fast", "speedy");
    skosEngine.addEntry("http://example.com/concept/3", SKOSType.PREF, "over");
    skosEngine.addEntry("http://example.com/concept/3", SKOSType.ALT, "above");
    skosEngine.addEntry("http://example.com/concept/4", SKOSType.PREF, "lazy");
    skosEngine.addEntry("http://example.com/concept/4", SKOSType.ALT, "apathic", "sluggish");
    skosEngine.addEntry("http://example.com/concept/5", SKOSType.PREF, "dog");
    skosEngine.addEntry("http://example.com/concept/5", SKOSType.ALT, "canine", "pooch");
    skosEngine.addEntry("http://example.com/concept/6", SKOSType.PREF, "united nations");
    skosEngine.addEntry("http://example.com/concept/6", SKOSType.ALT, "UN");
    skosEngine.addEntry("http://example.com/concept/7", SKOSType.PREF, "lazy dog");
    skosEngine.addEntry("http://example.com/concept/7", SKOSType.ALT, "Odie");
    directory = new RAMDirectory();
    skosAnalyzer = new SKOSAnalyzer(skosEngine, ExpansionType.LABEL);
    writer = new IndexWriter(directory, new IndexWriterConfig(skosAnalyzer));
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) SKOSEngineMock(at.ac.univie.mminf.luceneSKOS.skos.engine.mock.SKOSEngineMock) RAMDirectory(org.apache.lucene.store.RAMDirectory) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Before(org.junit.Before)

Example 4 with SKOSAnalyzer

use of at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer in project lucene-skos by behas.

the class URIbasedTermExpansionTest method uriBasedTermExpansion.

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which is semantically
 * enriched by a URI pointing to a SKOS concept "weapons".
 * <p/>
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label (altLabel) for the concept "weapons".
 *
 * @throws IOException
 */
@Test
public void uriBasedTermExpansion() throws IOException {
    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED));
    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";
    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.URI);
    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);
    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));
    /* adding the document to the index */
    writer.addDocument(doc);
    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);
    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    TopDocs results = searcher.search(builder.build(), 10);
    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);
    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));
    results = searcher.search(query, 10);
    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);
}
Also used : HashMap(java.util.HashMap) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Analyzer(org.apache.lucene.analysis.Analyzer) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Example 5 with SKOSAnalyzer

use of at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer in project lucene-skos by behas.

the class LabelbasedTermExpansionTest method labelBasedTermExpansion.

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field.
 * <p/>
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label for "weapons", the term which is
 * contained in the subject field.
 *
 * @throws IOException
 */
@Test
public void labelBasedTermExpansion() throws IOException {
    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED));
    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";
    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.LABEL);
    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);
    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));
    /* adding the document to the index */
    writer.addDocument(doc);
    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);
    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    TopDocs results = searcher.search(builder.build(), 10);
    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);
    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));
    results = searcher.search(query, 10);
    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);
}
Also used : HashMap(java.util.HashMap) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Analyzer(org.apache.lucene.analysis.Analyzer) SimpleAnalyzer(org.apache.lucene.analysis.core.SimpleAnalyzer) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) RAMDirectory(org.apache.lucene.store.RAMDirectory) PerFieldAnalyzerWrapper(org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) SKOSAnalyzer(at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test)

Aggregations

SKOSAnalyzer (at.ac.univie.mminf.luceneSKOS.analysis.SKOSAnalyzer)5 IndexWriter (org.apache.lucene.index.IndexWriter)5 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)5 RAMDirectory (org.apache.lucene.store.RAMDirectory)3 Before (org.junit.Before)3 HashMap (java.util.HashMap)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 SimpleAnalyzer (org.apache.lucene.analysis.core.SimpleAnalyzer)2 PerFieldAnalyzerWrapper (org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper)2 Document (org.apache.lucene.document.Document)2 Field (org.apache.lucene.document.Field)2 TextField (org.apache.lucene.document.TextField)2 Term (org.apache.lucene.index.Term)2 Test (org.junit.Test)2 SKOSEngineMock (at.ac.univie.mminf.luceneSKOS.skos.engine.mock.SKOSEngineMock)1