use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.
the class CreateTaxonomyIndexTask method doLogic.
@Override
public int doLogic() throws IOException {
PerfRunData runData = getRunData();
runData.setTaxonomyWriter(new DirectoryTaxonomyWriter(runData.getTaxonomyDir(), OpenMode.CREATE));
return 1;
}
use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project lucene-solr by apache.
the class AssociationsFacetsExample method index.
/** Build the example index. */
private void index() throws IOException {
IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE);
IndexWriter indexWriter = new IndexWriter(indexDir, iwc);
// Writes facet ords to a separate directory from the main index
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
Document doc = new Document();
// 3 occurrences for tag 'lucene'
doc.add(new IntAssociationFacetField(3, "tags", "lucene"));
// 87% confidence level of genre 'computing'
doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
// 1 occurrence for tag 'lucene'
doc.add(new IntAssociationFacetField(1, "tags", "lucene"));
// 2 occurrence for tag 'solr'
doc.add(new IntAssociationFacetField(2, "tags", "solr"));
// 75% confidence level of genre 'computing'
doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing"));
// 34% confidence level of genre 'software'
doc.add(new FloatAssociationFacetField(0.34f, "genre", "software"));
indexWriter.addDocument(config.build(taxoWriter, doc));
indexWriter.close();
taxoWriter.close();
}
use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project searchcode-server by boyter.
the class CodeIndexer method indexDocuments.
/**
* Given a queue of documents to index, index them by popping the queue limited to default of 1000 items.
* This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
* index.
* TODO investigate how Lucene deals with multiple writes
*/
public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION);
Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION);
Analyzer analyzer = new CodeAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
FacetsConfig facetsConfig;
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);
try {
CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
int count = 0;
while (codeIndexDocument != null) {
Singleton.getLogger().info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
facetsConfig = new FacetsConfig();
facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
Document doc = this.buildDocument(codeIndexDocument);
writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, doc));
count++;
if (count >= INDEX_QUEUE_BATCH_SIZE) {
codeIndexDocument = null;
} else {
codeIndexDocument = codeIndexDocumentQueue.poll();
}
}
} finally {
try {
writer.close();
} finally {
taxonomyWriter.close();
}
Singleton.getLogger().info("Closing writers");
}
}
use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project orientdb by orientechnologies.
the class LuceneNativeFacet method index.
/**
* Build the example index.
*/
private void index() throws IOException {
IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
// Writes facet ords to a separate directory from the main index
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
Document doc = new Document();
doc.add(new FacetField("Author", "Bob"));
doc.add(new FacetField("Publish Date", "2010", "10", "15"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2010", "10", "20"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2012", "1", "1"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Susan"));
doc.add(new FacetField("Publish Date", "2012", "1", "7"));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Frank"));
doc.add(new FacetField("Publish Date", "1999", "5", "5"));
indexWriter.addDocument(config.build(taxoWriter, doc));
indexWriter.close();
taxoWriter.close();
}
use of org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter in project searchcode-server by boyter.
the class IndexService method indexDocument.
/**
* Given a queue of documents to index, index them by popping the queue supplied.
* This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
* index.
*/
@Override
public synchronized void indexDocument(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
Directory indexDirectory = FSDirectory.open(this.INDEX_WRITE_LOCATION);
Directory facetDirectory = FSDirectory.open(this.FACET_WRITE_LOCATION);
Analyzer analyzer = new CodeAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);
CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
List<CodeIndexDocument> codeIndexDocumentList = new ArrayList<>();
while (codeIndexDocument != null) {
codeIndexDocumentList.add(codeIndexDocument);
codeIndexDocument = codeIndexDocumentQueue.poll();
}
try {
codeIndexDocumentList.parallelStream().forEach(x -> {
this.logger.info("843fb34b::indexing file " + x.getRepoLocationRepoNameLocationFilename());
this.decrementCodeIndexLinesCount(x.getLines());
FacetsConfig facetsConfig = new FacetsConfig();
facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
facetsConfig.setIndexFieldName(Values.SOURCE, Values.SOURCE);
Document document = this.buildDocument(x);
try {
writer.updateDocument(new Term(Values.PATH, x.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, document));
} catch (Exception ex) {
this.logger.severe(String.format("b824ed70::error in class %s exception %s", ex.getClass(), ex.getMessage()));
}
});
} finally {
this.helpers.closeQuietly(writer);
this.helpers.closeQuietly(taxonomyWriter);
this.logger.info("f32cef3e::closing writers");
}
}
Aggregations