use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.
the class TestTaxonomyFacetSumValueSource method testCountAndSumScore.
public void testCountAndSumScore() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
FacetsConfig config = new FacetsConfig();
config.setIndexFieldName("b", "$b");
for (int i = atLeast(30); i > 0; --i) {
Document doc = new Document();
doc.add(new StringField("f", "v", Field.Store.NO));
doc.add(new FacetField("a", "1"));
doc.add(new FacetField("b", "1"));
iw.addDocument(config.build(taxoWriter, doc));
}
DirectoryReader r = DirectoryReader.open(iw);
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
FacetsCollector fc = new FacetsCollector(true);
FacetsCollector.search(newSearcher(r), new MatchAllDocsQuery(), 10, fc);
Facets facets1 = getTaxonomyFacetCounts(taxoReader, config, fc);
Facets facets2 = new TaxonomyFacetSumValueSource(new DocValuesOrdinalsReader("$b"), taxoReader, config, fc, DoubleValuesSource.SCORES);
assertEquals(r.maxDoc(), facets1.getTopChildren(10, "a").value.intValue());
assertEquals(r.maxDoc(), facets2.getTopChildren(10, "b").value.doubleValue(), 1E-10);
iw.close();
IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}
use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.
the class TestTaxonomyFacetCounts2 method getConfig.
private static FacetsConfig getConfig() {
FacetsConfig config = new FacetsConfig();
config.setMultiValued("A", true);
config.setMultiValued("B", true);
config.setRequireDimCount("B", true);
config.setHierarchical("D", true);
return config;
}
use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.
the class IndexAndTaxonomyRevisionTest method newDocument.
private Document newDocument(TaxonomyWriter taxoWriter) throws IOException {
FacetsConfig config = new FacetsConfig();
Document doc = new Document();
doc.add(new FacetField("A", "1"));
return config.build(taxoWriter, doc);
}
use of org.apache.lucene.facet.FacetsConfig in project searchcode-server by boyter.
the class CodeIndexer method indexTimeDocuments.
/**
* Given a queue of documents to index, index them by popping the queue limited to 1000 items.
* This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
* index.
* TODO investigate how Lucene deals with multiple writes
*/
public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
// Index all documents and commit at the end for performance gains
Directory dir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION)));
Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION)));
Analyzer analyzer = new CodeAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
FacetsConfig facetsConfig;
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(dir, iwc);
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir);
try {
CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
int count = 0;
while (codeIndexDocument != null) {
Singleton.getLogger().info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
Document doc = new Document();
// Path is the primary key for documents
// needs to include repo location, project name and then filepath including file and revision
Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":" + codeIndexDocument.getRevision(), Field.Store.YES);
doc.add(pathField);
// Add in facets
facetsConfig = new FacetsConfig();
facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY);
facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH);
facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR);
facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION);
facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED);
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonthDay().substring(0, 6)));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR, codeIndexDocument.getYearMonthDay().substring(0, 4)));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted()));
}
String indexContents = Values.EMPTYSTRING;
indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents());
indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents());
// Store in spelling corrector
this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents());
indexContents = indexContents.toLowerCase();
doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES));
doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES));
doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES));
doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES));
doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES));
doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES));
doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES));
doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO));
doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES));
doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES));
doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES));
doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(), Field.Store.YES));
doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES));
doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES));
doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES));
doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES));
// Extra metadata in this case when it was last indexed
doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES));
writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxoWriter, doc));
count++;
if (count >= INDEX_QUEUE_BATCH_SIZE) {
codeIndexDocument = null;
} else {
codeIndexDocument = codeIndexDocumentQueue.poll();
}
}
} finally {
Singleton.getLogger().info("Closing writers");
writer.close();
taxoWriter.close();
}
}
use of org.apache.lucene.facet.FacetsConfig in project lucene-solr by apache.
the class TestConcurrentFacetedIndexing method testConcurrency.
public void testConcurrency() throws Exception {
final AtomicInteger numDocs = new AtomicInteger(atLeast(10000));
final Directory indexDir = newDirectory();
final Directory taxoDir = newDirectory();
final ConcurrentHashMap<String, String> values = new ConcurrentHashMap<>();
final IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(null));
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, newTaxoWriterCache(numDocs.get()));
final Thread[] indexThreads = new Thread[atLeast(4)];
final FacetsConfig config = new FacetsConfig();
for (int i = 0; i < 10; i++) {
config.setHierarchical("l1." + i, true);
config.setMultiValued("l1." + i, true);
}
for (int i = 0; i < indexThreads.length; i++) {
indexThreads[i] = new Thread() {
@Override
public void run() {
Random random = random();
while (numDocs.decrementAndGet() > 0) {
try {
Document doc = new Document();
// 1-3
int numCats = random.nextInt(3) + 1;
while (numCats-- > 0) {
FacetField ff = newCategory();
doc.add(ff);
FacetLabel label = new FacetLabel(ff.dim, ff.path);
// add all prefixes to values
int level = label.length;
while (level > 0) {
String s = FacetsConfig.pathToString(label.components, level);
values.put(s, s);
--level;
}
}
iw.addDocument(config.build(tw, doc));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
};
}
for (Thread t : indexThreads) t.start();
for (Thread t : indexThreads) t.join();
DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(tw);
// +1 for root category
if (values.size() + 1 != tr.getSize()) {
for (String value : values.keySet()) {
FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
if (tr.getOrdinal(label) == -1) {
System.out.println("FAIL: path=" + label + " not recognized");
}
}
fail("mismatch number of categories");
}
int[] parents = tr.getParallelTaxonomyArrays().parents();
for (String cat : values.keySet()) {
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
int level = cp.length;
// for root, parent is always virtual ROOT (ord=0)
int parentOrd = 0;
FacetLabel path = null;
for (int i = 0; i < level; i++) {
path = cp.subpath(i + 1);
int ord = tr.getOrdinal(path);
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
// next level should have this parent
parentOrd = ord;
}
}
iw.close();
IOUtils.close(tw, tr, taxoDir, indexDir);
}
Aggregations