use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.
the class TestTaxonomyFacetCounts method testManyFacetsInOneDocument.
// LUCENE-4583: make sure if we require > 32 KB for one
// document, we don't hit exc when using Facet42DocValuesFormat
public void testManyFacetsInOneDocument() throws Exception {
assumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.fieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setMultiValued("dim", true);
int numLabels = TestUtil.nextInt(random(), 40000, 100000);
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
for (int i = 0; i < numLabels; i++) {
doc.add(new FacetField("dim", "" + i));
}
writer.addDocument(config.build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);
FacetResult result = facets.getTopChildren(Integer.MAX_VALUE, "dim");
assertEquals(numLabels, result.labelValues.length);
Set<String> allLabels = new HashSet<>();
for (LabelAndValue labelValue : result.labelValues) {
allLabels.add(labelValue.label);
assertEquals(1, labelValue.value.intValue());
}
assertEquals(numLabels, allLabels.size());
writer.close();
IOUtils.close(searcher.getIndexReader(), taxoWriter, taxoReader, dir, taxoDir);
}
use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.
the class IndexAndTaxonomyReplicationClientTest method setUp.
@Override
@Before
public void setUp() throws Exception {
super.setUp();
publishIndexDir = newDirectory();
publishTaxoDir = newDirectory();
handlerIndexDir = newMockDirectory();
handlerTaxoDir = newMockDirectory();
clientWorkDir = createTempDir("replicationClientTest");
sourceDirFactory = new PerSessionDirectoryFactory(clientWorkDir);
replicator = new LocalReplicator();
callback = new IndexAndTaxonomyReadyCallback(handlerIndexDir, handlerTaxoDir);
handler = new IndexAndTaxonomyReplicationHandler(handlerIndexDir, handlerTaxoDir, callback);
client = new ReplicationClient(replicator, handler, sourceDirFactory);
IndexWriterConfig conf = newIndexWriterConfig(null);
conf.setIndexDeletionPolicy(new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()));
publishIndexWriter = new IndexWriter(publishIndexDir, conf);
publishTaxoWriter = new SnapshotDirectoryTaxonomyWriter(publishTaxoDir);
config = new FacetsConfig();
config.setHierarchical("A", true);
}
use of org.apache.lucene.facet.FacetsConfig in project searchcode-server by boyter.
the class CodeIndexer method indexDocuments.
/**
* Given a queue of documents to index, index them by popping the queue limited to default of 1000 items.
* This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
* index.
* TODO investigate how Lucene deals with multiple writes
*/
public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION);
Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION);
Analyzer analyzer = new CodeAnalyzer();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
FacetsConfig facetsConfig;
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);
try {
CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
int count = 0;
while (codeIndexDocument != null) {
Singleton.getLogger().info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
facetsConfig = new FacetsConfig();
facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
Document doc = this.buildDocument(codeIndexDocument);
writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, doc));
count++;
if (count >= INDEX_QUEUE_BATCH_SIZE) {
codeIndexDocument = null;
} else {
codeIndexDocument = codeIndexDocumentQueue.poll();
}
}
} finally {
try {
writer.close();
} finally {
taxonomyWriter.close();
}
Singleton.getLogger().info("Closing writers");
}
}
Aggregations