Examples with TaxonomyWriter - org.apache.lucene.facet.taxonomy.TaxonomyWriter

Example 1 with TaxonomyWriter

use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project lucene-solr by apache.

the class TestMultipleIndexFields method testSomeSameSomeDifferent.

@Test
public void testSomeSameSomeDifferent() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    // create and open an index writer
    RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    // create and open a taxonomy writer
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
    FacetsConfig config = getConfig();
    config.setIndexFieldName("Band", "$music");
    config.setIndexFieldName("Composer", "$music");
    config.setIndexFieldName("Author", "$literature");
    seedIndex(tw, iw, config);
    IndexReader ir = iw.getReader();
    tw.commit();
    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);
    FacetsCollector sfc = performSearch(tr, ir, searcher);
    Map<String, Facets> facetsMap = new HashMap<>();
    Facets facets2 = getTaxonomyFacetCounts(tr, config, sfc, "$music");
    facetsMap.put("Band", facets2);
    facetsMap.put("Composer", facets2);
    facetsMap.put("Author", getTaxonomyFacetCounts(tr, config, sfc, "$literature"));
    Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));
    // Obtain facets results and hand-test them
    assertCorrectResults(facets);
    assertOrdinalsExist("$music", ir);
    assertOrdinalsExist("$literature", ir);
    iw.close();
    IOUtils.close(tr, ir, iw, tw, indexDir, taxoDir);
}

Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) HashMap(java.util.HashMap) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader) Test(org.junit.Test)

Example 2 with TaxonomyWriter

use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project lucene-solr by apache.

the class TestMultipleIndexFields method testCustom.

@Test
public void testCustom() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    // create and open an index writer
    RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    // create and open a taxonomy writer
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
    FacetsConfig config = getConfig();
    config.setIndexFieldName("Author", "$author");
    seedIndex(tw, iw, config);
    IndexReader ir = iw.getReader();
    tw.commit();
    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);
    FacetsCollector sfc = performSearch(tr, ir, searcher);
    Map<String, Facets> facetsMap = new HashMap<>();
    facetsMap.put("Author", getTaxonomyFacetCounts(tr, config, sfc, "$author"));
    Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));
    // Obtain facets results and hand-test them
    assertCorrectResults(facets);
    assertOrdinalsExist("$facets", ir);
    assertOrdinalsExist("$author", ir);
    iw.close();
    IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Example 3 with TaxonomyWriter

use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project lucene-solr by apache.

the class TestDirectoryTaxonomyReader method doTestReadRecreatedTaxonomy.

private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
    Directory dir = null;
    TaxonomyWriter tw = null;
    TaxonomyReader tr = null;
    // prepare a few categories
    int n = 10;
    FacetLabel[] cp = new FacetLabel[n];
    for (int i = 0; i < n; i++) {
        cp[i] = new FacetLabel("a", Integer.toString(i));
    }
    try {
        dir = newDirectory();
        tw = new DirectoryTaxonomyWriter(dir);
        tw.addCategory(new FacetLabel("a"));
        tw.close();
        tr = new DirectoryTaxonomyReader(dir);
        int baseNumCategories = tr.getSize();
        for (int i = 0; i < n; i++) {
            int k = random.nextInt(n);
            tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
            for (int j = 0; j <= k; j++) {
                tw.addCategory(cp[j]);
            }
            tw.close();
            if (closeReader) {
                tr.close();
                tr = new DirectoryTaxonomyReader(dir);
            } else {
                TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
                assertNotNull(newtr);
                tr.close();
                tr = newtr;
            }
            assertEquals("Wrong #categories in taxonomy (i=" + i + ", k=" + k + ")", baseNumCategories + 1 + k, tr.getSize());
        }
    } finally {
        IOUtils.close(tr, tw, dir);
    }
}

Also used : TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) TaxonomyReader(org.apache.lucene.facet.taxonomy.TaxonomyReader) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory)

Example 4 with TaxonomyWriter

use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project searchcode-server by boyter.

the class CodeIndexer method indexTimeDocuments.

/**
     * Given a queue of documents to index, index them by popping the queue limited to 1000 items.
     * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
     * index.
     * TODO investigate how Lucene deals with multiple writes
     */
public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
    // Index all documents and commit at the end for performance gains
    Directory dir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION)));
    Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION)));
    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir);
    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;
        while (codeIndexDocument != null) {
            Singleton.getLogger().info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
            Document doc = new Document();
            // Path is the primary key for documents
            // needs to include repo location, project name and then filepath including file and revision
            Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":" + codeIndexDocument.getRevision(), Field.Store.YES);
            doc.add(pathField);
            // Add in facets
            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH);
            facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR);
            facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION);
            facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED);
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonthDay().substring(0, 6)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR, codeIndexDocument.getYearMonthDay().substring(0, 4)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted()));
            }
            String indexContents = Values.EMPTYSTRING;
            indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents());
            indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents());
            // Store in spelling corrector
            this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents());
            indexContents = indexContents.toLowerCase();
            doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES));
            doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES));
            doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES));
            doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES));
            doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES));
            doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO));
            doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES));
            doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES));
            doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES));
            doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES));
            doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES));
            // Extra metadata in this case when it was last indexed
            doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES));
            writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxoWriter, doc));
            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        Singleton.getLogger().info("Closing writers");
        writer.close();
        taxoWriter.close();
    }
}

Also used : FacetsConfig(org.apache.lucene.facet.FacetsConfig) Term(org.apache.lucene.index.Term) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) Date(java.util.Date) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) IndexWriter(org.apache.lucene.index.IndexWriter) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 5 with TaxonomyWriter

use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project lucene-solr by apache.

the class TestMultipleIndexFields method testTwoCustomsSameField.

@Test
public void testTwoCustomsSameField() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    // create and open an index writer
    RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    // create and open a taxonomy writer
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
    FacetsConfig config = getConfig();
    config.setIndexFieldName("Band", "$music");
    config.setIndexFieldName("Composer", "$music");
    seedIndex(tw, iw, config);
    IndexReader ir = iw.getReader();
    tw.commit();
    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);
    FacetsCollector sfc = performSearch(tr, ir, searcher);
    Map<String, Facets> facetsMap = new HashMap<>();
    Facets facets2 = getTaxonomyFacetCounts(tr, config, sfc, "$music");
    facetsMap.put("Band", facets2);
    facetsMap.put("Composer", facets2);
    Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));
    // Obtain facets results and hand-test them
    assertCorrectResults(facets);
    assertOrdinalsExist("$facets", ir);
    assertOrdinalsExist("$music", ir);
    assertOrdinalsExist("$music", ir);
    iw.close();
    IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}

Aggregations

TaxonomyWriter (org.apache.lucene.facet.taxonomy.TaxonomyWriter)10 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)9 Directory (org.apache.lucene.store.Directory)9 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)6 TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)6 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)6 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)6 IndexReader (org.apache.lucene.index.IndexReader)5 IndexSearcher (org.apache.lucene.search.IndexSearcher)5 Test (org.junit.Test)5 HashMap (java.util.HashMap)4 Analyzer (org.apache.lucene.analysis.Analyzer)3 FSDirectory (org.apache.lucene.store.FSDirectory)3 CodeIndexDocument (com.searchcode.app.dto.CodeIndexDocument)2 CodeAnalyzer (com.searchcode.app.util.CodeAnalyzer)2 FacetsConfig (org.apache.lucene.facet.FacetsConfig)2 IndexWriter (org.apache.lucene.index.IndexWriter)2 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)2 Term (org.apache.lucene.index.Term)2 IOException (java.io.IOException)1