use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project lucene-solr by apache.
the class TestMultipleIndexFields method testSomeSameSomeDifferent.
@Test
public void testSomeSameSomeDifferent() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
// create and open an index writer
RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
// create and open a taxonomy writer
TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = getConfig();
config.setIndexFieldName("Band", "$music");
config.setIndexFieldName("Composer", "$music");
config.setIndexFieldName("Author", "$literature");
seedIndex(tw, iw, config);
IndexReader ir = iw.getReader();
tw.commit();
// prepare index reader and taxonomy.
TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
// prepare searcher to search against
IndexSearcher searcher = newSearcher(ir);
FacetsCollector sfc = performSearch(tr, ir, searcher);
Map<String, Facets> facetsMap = new HashMap<>();
Facets facets2 = getTaxonomyFacetCounts(tr, config, sfc, "$music");
facetsMap.put("Band", facets2);
facetsMap.put("Composer", facets2);
facetsMap.put("Author", getTaxonomyFacetCounts(tr, config, sfc, "$literature"));
Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));
// Obtain facets results and hand-test them
assertCorrectResults(facets);
assertOrdinalsExist("$music", ir);
assertOrdinalsExist("$literature", ir);
iw.close();
IOUtils.close(tr, ir, iw, tw, indexDir, taxoDir);
}
use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project lucene-solr by apache.
the class TestMultipleIndexFields method testCustom.
@Test
public void testCustom() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
// create and open an index writer
RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
// create and open a taxonomy writer
TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = getConfig();
config.setIndexFieldName("Author", "$author");
seedIndex(tw, iw, config);
IndexReader ir = iw.getReader();
tw.commit();
// prepare index reader and taxonomy.
TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
// prepare searcher to search against
IndexSearcher searcher = newSearcher(ir);
FacetsCollector sfc = performSearch(tr, ir, searcher);
Map<String, Facets> facetsMap = new HashMap<>();
facetsMap.put("Author", getTaxonomyFacetCounts(tr, config, sfc, "$author"));
Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));
// Obtain facets results and hand-test them
assertCorrectResults(facets);
assertOrdinalsExist("$facets", ir);
assertOrdinalsExist("$author", ir);
iw.close();
IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}
use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project lucene-solr by apache.
the class TestDirectoryTaxonomyReader method doTestReadRecreatedTaxonomy.
private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
Directory dir = null;
TaxonomyWriter tw = null;
TaxonomyReader tr = null;
// prepare a few categories
int n = 10;
FacetLabel[] cp = new FacetLabel[n];
for (int i = 0; i < n; i++) {
cp[i] = new FacetLabel("a", Integer.toString(i));
}
try {
dir = newDirectory();
tw = new DirectoryTaxonomyWriter(dir);
tw.addCategory(new FacetLabel("a"));
tw.close();
tr = new DirectoryTaxonomyReader(dir);
int baseNumCategories = tr.getSize();
for (int i = 0; i < n; i++) {
int k = random.nextInt(n);
tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
for (int j = 0; j <= k; j++) {
tw.addCategory(cp[j]);
}
tw.close();
if (closeReader) {
tr.close();
tr = new DirectoryTaxonomyReader(dir);
} else {
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
assertNotNull(newtr);
tr.close();
tr = newtr;
}
assertEquals("Wrong #categories in taxonomy (i=" + i + ", k=" + k + ")", baseNumCategories + 1 + k, tr.getSize());
}
} finally {
IOUtils.close(tr, tw, dir);
}
}
use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project searchcode-server by boyter.
the class CodeIndexer method indexTimeDocuments.
/**
* Given a queue of documents to index, index them by popping the queue limited to 1000 items.
* This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
* index.
* TODO investigate how Lucene deals with multiple writes
*/
public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
// Index all documents and commit at the end for performance gains
Directory dir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION)));
Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties().getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION)));
Analyzer analyzer = new CodeAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
FacetsConfig facetsConfig;
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(dir, iwc);
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir);
try {
CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
int count = 0;
while (codeIndexDocument != null) {
Singleton.getLogger().info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());
Document doc = new Document();
// Path is the primary key for documents
// needs to include repo location, project name and then filepath including file and revision
Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":" + codeIndexDocument.getRevision(), Field.Store.YES);
doc.add(pathField);
// Add in facets
facetsConfig = new FacetsConfig();
facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY);
facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH);
facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR);
facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION);
facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED);
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonthDay().substring(0, 6)));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR, codeIndexDocument.getYearMonthDay().substring(0, 4)));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision()));
}
if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) {
doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted()));
}
String indexContents = Values.EMPTYSTRING;
indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents());
indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents());
// Store in spelling corrector
this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents());
indexContents = indexContents.toLowerCase();
doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES));
doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES));
doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES));
doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES));
doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES));
doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES));
doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES));
doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO));
doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES));
doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES));
doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES));
doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(), Field.Store.YES));
doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES));
doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES));
doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES));
doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES));
// Extra metadata in this case when it was last indexed
doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES));
writer.updateDocument(new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxoWriter, doc));
count++;
if (count >= INDEX_QUEUE_BATCH_SIZE) {
codeIndexDocument = null;
} else {
codeIndexDocument = codeIndexDocumentQueue.poll();
}
}
} finally {
Singleton.getLogger().info("Closing writers");
writer.close();
taxoWriter.close();
}
}
use of org.apache.lucene.facet.taxonomy.TaxonomyWriter in project lucene-solr by apache.
the class TestMultipleIndexFields method testTwoCustomsSameField.
@Test
public void testTwoCustomsSameField() throws Exception {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
// create and open an index writer
RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
// create and open a taxonomy writer
TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
FacetsConfig config = getConfig();
config.setIndexFieldName("Band", "$music");
config.setIndexFieldName("Composer", "$music");
seedIndex(tw, iw, config);
IndexReader ir = iw.getReader();
tw.commit();
// prepare index reader and taxonomy.
TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
// prepare searcher to search against
IndexSearcher searcher = newSearcher(ir);
FacetsCollector sfc = performSearch(tr, ir, searcher);
Map<String, Facets> facetsMap = new HashMap<>();
Facets facets2 = getTaxonomyFacetCounts(tr, config, sfc, "$music");
facetsMap.put("Band", facets2);
facetsMap.put("Composer", facets2);
Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));
// Obtain facets results and hand-test them
assertCorrectResults(facets);
assertOrdinalsExist("$facets", ir);
assertOrdinalsExist("$music", ir);
assertOrdinalsExist("$music", ir);
iw.close();
IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}
Aggregations