Search in sources :

Example 26 with FacetsConfig

use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.

the class TestTaxonomyFacetSumValueSource method testCountAndSumScore.

public void testCountAndSumScore() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
    FacetsConfig config = new FacetsConfig();
    config.setIndexFieldName("b", "$b");
    for (int i = atLeast(30); i > 0; --i) {
        Document doc = new Document();
        doc.add(new StringField("f", "v", Field.Store.NO));
        doc.add(new FacetField("a", "1"));
        doc.add(new FacetField("b", "1"));
        iw.addDocument(config.build(taxoWriter, doc));
    }
    DirectoryReader r = DirectoryReader.open(iw);
    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    FacetsCollector fc = new FacetsCollector(true);
    FacetsCollector.search(newSearcher(r), new MatchAllDocsQuery(), 10, fc);
    Facets facets1 = getTaxonomyFacetCounts(taxoReader, config, fc);
    Facets facets2 = new TaxonomyFacetSumValueSource(new DocValuesOrdinalsReader("$b"), taxoReader, config, fc, DoubleValuesSource.SCORES);
    assertEquals(r.maxDoc(), facets1.getTopChildren(10, "a").value.intValue());
    assertEquals(r.maxDoc(), facets2.getTopChildren(10, "b").value.doubleValue(), 1E-10);
    iw.close();
    IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}
Also used : FacetsConfig(org.apache.lucene.facet.FacetsConfig) Facets(org.apache.lucene.facet.Facets) DirectoryReader(org.apache.lucene.index.DirectoryReader) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) FacetsCollector(org.apache.lucene.facet.FacetsCollector) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) StringField(org.apache.lucene.document.StringField) Directory(org.apache.lucene.store.Directory) DirectoryTaxonomyReader(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)

Example 27 with FacetsConfig

use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.

the class TestTaxonomyFacetCounts2 method getConfig.

private static FacetsConfig getConfig() {
    FacetsConfig config = new FacetsConfig();
    config.setMultiValued("A", true);
    config.setMultiValued("B", true);
    config.setRequireDimCount("B", true);
    config.setHierarchical("D", true);
    return config;
}
Also used : FacetsConfig(org.apache.lucene.facet.FacetsConfig)

Example 28 with FacetsConfig

use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.

the class IndexAndTaxonomyRevisionTest method newDocument.

private Document newDocument(TaxonomyWriter taxoWriter) throws IOException {
    FacetsConfig config = new FacetsConfig();
    Document doc = new Document();
    doc.add(new FacetField("A", "1"));
    return config.build(taxoWriter, doc);
}
Also used : FacetsConfig(org.apache.lucene.facet.FacetsConfig) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document)

Example 29 with FacetsConfig

use of org.apache.lucene.facet.FacetsConfig in project searchcode-server by boyter.

the class CodeIndexer method indexTimeDocuments.

/**
     * Given a queue of documents to index, index them by popping the queue limited to 1000 items.
     * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
     * index.
     * TODO investigate how Lucene deals with multiple writes
     */
public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
    // Index all documents and commit at the end for performance gains
    Directory dir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION)));
    Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION)));
    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir);
    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;
        while (codeIndexDocument != null) {
            Singleton.getLogger().info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
            Document doc = new Document();
            // Path is the primary key for documents
            // needs to include repo location, project name and then filepath including file and revision
            Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":" + codeIndexDocument.getRevision(), Field.Store.YES);
            doc.add(pathField);
            // Add in facets
            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH);
            facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR);
            facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION);
            facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED);
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonthDay().substring(0, 6)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR, codeIndexDocument.getYearMonthDay().substring(0, 4)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted()));
            }
            String indexContents = Values.EMPTYSTRING;
            indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents());
            indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents());
            // Store in spelling corrector
            this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents());
            indexContents = indexContents.toLowerCase();
            doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES));
            doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES));
            doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES));
            doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES));
            doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES));
            doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO));
            doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES));
            doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES));
            doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES));
            doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES));
            doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES));
            // Extra metadata in this case when it was last indexed
            doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES));
            writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxoWriter, doc));
            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        Singleton.getLogger().info("Closing writers");
        writer.close();
        taxoWriter.close();
    }
}
Also used : FacetsConfig(org.apache.lucene.facet.FacetsConfig) Term(org.apache.lucene.index.Term) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) Date(java.util.Date) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) IndexWriter(org.apache.lucene.index.IndexWriter) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) SortedSetDocValuesFacetField(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 30 with FacetsConfig

use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.

the class TestConcurrentFacetedIndexing method testConcurrency.

public void testConcurrency() throws Exception {
    final AtomicInteger numDocs = new AtomicInteger(atLeast(10000));
    final Directory indexDir = newDirectory();
    final Directory taxoDir = newDirectory();
    final ConcurrentHashMap<String, String> values = new ConcurrentHashMap<>();
    final IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(null));
    final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, newTaxoWriterCache(numDocs.get()));
    final Thread[] indexThreads = new Thread[atLeast(4)];
    final FacetsConfig config = new FacetsConfig();
    for (int i = 0; i < 10; i++) {
        config.setHierarchical("l1." + i, true);
        config.setMultiValued("l1." + i, true);
    }
    for (int i = 0; i < indexThreads.length; i++) {
        indexThreads[i] = new Thread() {

            @Override
            public void run() {
                Random random = random();
                while (numDocs.decrementAndGet() > 0) {
                    try {
                        Document doc = new Document();
                        // 1-3
                        int numCats = random.nextInt(3) + 1;
                        while (numCats-- > 0) {
                            FacetField ff = newCategory();
                            doc.add(ff);
                            FacetLabel label = new FacetLabel(ff.dim, ff.path);
                            // add all prefixes to values
                            int level = label.length;
                            while (level > 0) {
                                String s = FacetsConfig.pathToString(label.components, level);
                                values.put(s, s);
                                --level;
                            }
                        }
                        iw.addDocument(config.build(tw, doc));
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            }
        };
    }
    for (Thread t : indexThreads) t.start();
    for (Thread t : indexThreads) t.join();
    DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(tw);
    // +1 for root category
    if (values.size() + 1 != tr.getSize()) {
        for (String value : values.keySet()) {
            FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
            if (tr.getOrdinal(label) == -1) {
                System.out.println("FAIL: path=" + label + " not recognized");
            }
        }
        fail("mismatch number of categories");
    }
    int[] parents = tr.getParallelTaxonomyArrays().parents();
    for (String cat : values.keySet()) {
        FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
        assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
        int level = cp.length;
        // for root, parent is always virtual ROOT (ord=0)
        int parentOrd = 0;
        FacetLabel path = null;
        for (int i = 0; i < level; i++) {
            path = cp.subpath(i + 1);
            int ord = tr.getOrdinal(path);
            assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
            // next level should have this parent
            parentOrd = ord;
        }
    }
    iw.close();
    IOUtils.close(tw, tr, taxoDir, indexDir);
}
Also used : FacetsConfig(org.apache.lucene.facet.FacetsConfig) FacetLabel(org.apache.lucene.facet.taxonomy.FacetLabel) FacetField(org.apache.lucene.facet.FacetField) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) Random(java.util.Random) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IndexWriter(org.apache.lucene.index.IndexWriter) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Directory(org.apache.lucene.store.Directory)

Aggregations

FacetsConfig (org.apache.lucene.facet.FacetsConfig)53 Document (org.apache.lucene.document.Document)44 Directory (org.apache.lucene.store.Directory)44 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)38 DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)35 FacetField (org.apache.lucene.facet.FacetField)29 Facets (org.apache.lucene.facet.Facets)29 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)24 IndexSearcher (org.apache.lucene.search.IndexSearcher)23 FacetsCollector (org.apache.lucene.facet.FacetsCollector)22 FacetResult (org.apache.lucene.facet.FacetResult)18 IndexWriter (org.apache.lucene.index.IndexWriter)18 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)17 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)16 DirectoryReader (org.apache.lucene.index.DirectoryReader)12 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)8 DrillDownQuery (org.apache.lucene.facet.DrillDownQuery)8 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)8 LabelAndValue (org.apache.lucene.facet.LabelAndValue)7 Term (org.apache.lucene.index.Term)6