Search in sources :

Example 81 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class CreateTaxonomyIndexTask method doLogic.

@Override
public int doLogic() throws IOException {
    PerfRunData runData = getRunData();
    runData.setTaxonomyWriter(new DirectoryTaxonomyWriter(runData.getTaxonomyDir(), OpenMode.CREATE));
    return 1;
}
Also used : DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) PerfRunData(org.apache.lucene.benchmark.byTask.PerfRunData)

Example 82 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.

the class AssociationsFacetsExample method index.

/** Build the example index. */
private void index() throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(indexDir, iwc);
    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    Document doc = new Document();
    // 3 occurrences for tag 'lucene'
    doc.add(new IntAssociationFacetField(3, "tags", "lucene"));
    // 87% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing"));
    indexWriter.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    // 1 occurrence for tag 'lucene'
    doc.add(new IntAssociationFacetField(1, "tags", "lucene"));
    // 2 occurrence for tag 'solr'
    doc.add(new IntAssociationFacetField(2, "tags", "solr"));
    // 75% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing"));
    // 34% confidence level of genre 'software'
    doc.add(new FloatAssociationFacetField(0.34f, "genre", "software"));
    indexWriter.addDocument(config.build(taxoWriter, doc));
    indexWriter.close();
    taxoWriter.close();
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) IndexWriter(org.apache.lucene.index.IndexWriter) FloatAssociationFacetField(org.apache.lucene.facet.taxonomy.FloatAssociationFacetField) Document(org.apache.lucene.document.Document) IntAssociationFacetField(org.apache.lucene.facet.taxonomy.IntAssociationFacetField) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 83 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project searchcode-server by boyter.

the class CodeIndexer method indexDocuments.

/**
     * Given a queue of documents to index, index them by popping the queue limited to default of 1000 items.
     * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
     * index.
     * TODO investigate how Lucene deals with multiple writes
     */
public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
    Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION);
    Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION);
    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);
    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;
        while (codeIndexDocument != null) {
            Singleton.getLogger().info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
            Document doc = this.buildDocument(codeIndexDocument);
            writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, doc));
            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        try {
            writer.close();
        } finally {
            taxonomyWriter.close();
        }
        Singleton.getLogger().info("Closing writers");
    }
}
Also used : DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) FacetsConfig(org.apache.lucene.facet.FacetsConfig) IndexWriter(org.apache.lucene.index.IndexWriter) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) Term(org.apache.lucene.index.Term) CodeAnalyzer(com.searchcode.app.util.CodeAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) CodeIndexDocument(com.searchcode.app.dto.CodeIndexDocument) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 84 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project orientdb by orientechnologies.

the class LuceneNativeFacet method index.

/**
 * Build the example index.
 */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    indexWriter.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    indexWriter.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    indexWriter.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    indexWriter.addDocument(config.build(taxoWriter, doc));
    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    indexWriter.addDocument(config.build(taxoWriter, doc));
    indexWriter.close();
    taxoWriter.close();
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) IndexWriter(org.apache.lucene.index.IndexWriter) FacetField(org.apache.lucene.facet.FacetField) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 85 with DirectoryTaxonomyWriter

use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project searchcode-server by boyter.

the class IndexService method indexDocument.

/**
 * Given a queue of documents to index, index them by popping the queue supplied.
 * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
 * index.
 */
@Override
public synchronized void indexDocument(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
    Directory indexDirectory = FSDirectory.open(this.INDEX_WRITE_LOCATION);
    Directory facetDirectory = FSDirectory.open(this.FACET_WRITE_LOCATION);
    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);
    CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
    List<CodeIndexDocument> codeIndexDocumentList = new ArrayList<>();
    while (codeIndexDocument != null) {
        codeIndexDocumentList.add(codeIndexDocument);
        codeIndexDocument = codeIndexDocumentQueue.poll();
    }
    try {
        codeIndexDocumentList.parallelStream().forEach(x -> {
            this.logger.info("843fb34b::indexing file " + x.getRepoLocationRepoNameLocationFilename());
            this.decrementCodeIndexLinesCount(x.getLines());
            FacetsConfig facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
            facetsConfig.setIndexFieldName(Values.SOURCE, Values.SOURCE);
            Document document = this.buildDocument(x);
            try {
                writer.updateDocument(new Term(Values.PATH, x.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, document));
            } catch (Exception ex) {
                this.logger.severe(String.format("b824ed70::error in class %s exception %s", ex.getClass(), ex.getMessage()));
            }
        });
    } finally {
        this.helpers.closeQuietly(writer);
        this.helpers.closeQuietly(taxonomyWriter);
        this.logger.info("f32cef3e::closing writers");
    }
}
Also used : Analyzer(org.apache.lucene.analysis.Analyzer) ParseException(org.apache.lucene.queryparser.classic.ParseException) NoSuchFileException(java.nio.file.NoSuchFileException) IOException(java.io.IOException) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) TaxonomyWriter(org.apache.lucene.facet.taxonomy.TaxonomyWriter) DirectoryTaxonomyWriter(org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter) Directory(org.apache.lucene.store.Directory) FSDirectory(org.apache.lucene.store.FSDirectory)

Aggregations

DirectoryTaxonomyWriter (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter)85 Directory (org.apache.lucene.store.Directory)73 DirectoryTaxonomyReader (org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader)52 Document (org.apache.lucene.document.Document)46 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)45 FacetsConfig (org.apache.lucene.facet.FacetsConfig)35 FacetField (org.apache.lucene.facet.FacetField)31 Test (org.junit.Test)28 IndexSearcher (org.apache.lucene.search.IndexSearcher)27 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)26 IndexWriter (org.apache.lucene.index.IndexWriter)25 Facets (org.apache.lucene.facet.Facets)22 SlowRAMDirectory (org.apache.lucene.facet.SlowRAMDirectory)21 FacetsCollector (org.apache.lucene.facet.FacetsCollector)17 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)15 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)15 FacetResult (org.apache.lucene.facet.FacetResult)14 TaxonomyReader (org.apache.lucene.facet.taxonomy.TaxonomyReader)13 DirectoryReader (org.apache.lucene.index.DirectoryReader)12 TaxonomyWriter (org.apache.lucene.facet.taxonomy.TaxonomyWriter)9