use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestOrdValues method addDoc.
private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
Document d = new Document();
Field f;
int scoreAndID = i + 1;
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setTokenized(false);
customType.setOmitNorms(true);
// for debug purposes
f = newField(ID_FIELD, id2String(scoreAndID), customType);
d.add(f);
d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID))));
FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED);
customType2.setOmitNorms(true);
// for regular search
f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2);
d.add(f);
// for function scoring
f = new LegacyIntField(INT_FIELD, scoreAndID, Store.YES);
d.add(f);
d.add(new NumericDocValuesField(INT_FIELD, scoreAndID));
// for function scoring
f = new LegacyFloatField(FLOAT_FIELD, scoreAndID, Store.YES);
d.add(f);
d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID)));
iw.addDocument(d);
log("added: " + d);
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class PreAnalyzedUpdateProcessor method mutate.
@Override
protected SolrInputField mutate(SolrInputField src) {
SchemaField sf = schema.getFieldOrNull(src.getName());
if (sf == null) {
// remove this field
return null;
}
FieldType type = PreAnalyzedField.createFieldType(sf);
if (type == null) {
// neither indexed nor stored - skip
return null;
}
SolrInputField res = new SolrInputField(src.getName());
for (Object o : src) {
if (o == null) {
continue;
}
Field pre = (Field) parser.createField(sf, o);
if (pre != null) {
res.addValue(pre);
} else {
// restore the original value
log.warn("Could not parse field {} - using original value as is: {}", src.getName(), o);
res.addValue(o);
}
}
return res;
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class TestTeeSinkTokenFilter method testEndOffsetPositionWithTeeSinkTokenFilter.
// LUCENE-1448
// TODO: instead of testing it this way, we can test
// with BaseTokenStreamTestCase now...
public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
Document doc = new Document();
TokenStream tokenStream = analyzer.tokenStream("field", "abcd ");
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
TokenStream sink = tee.newSinkTokenStream();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
ft.setStoreTermVectorPositions(true);
Field f1 = new Field("field", tee, ft);
Field f2 = new Field("field", sink, ft);
doc.add(f1);
doc.add(f2);
w.addDocument(doc);
w.close();
IndexReader r = DirectoryReader.open(dir);
Terms vector = r.getTermVectors(0).terms("field");
assertEquals(1, vector.size());
TermsEnum termsEnum = vector.iterator();
termsEnum.next();
assertEquals(2, termsEnum.totalTermFreq());
PostingsEnum positions = termsEnum.postings(null, PostingsEnum.ALL);
assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(2, positions.freq());
positions.nextPosition();
assertEquals(0, positions.startOffset());
assertEquals(4, positions.endOffset());
positions.nextPosition();
assertEquals(8, positions.startOffset());
assertEquals(12, positions.endOffset());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc());
r.close();
dir.close();
analyzer.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class Test2BPostingsBytes method test.
public void test() throws Exception {
IndexWriterConfig defaultConfig = new IndexWriterConfig(null);
Codec defaultCodec = defaultConfig.getCodec();
if ((new IndexWriterConfig(null)).getCodec() instanceof CompressingCodec) {
Pattern regex = Pattern.compile("maxDocsPerChunk=(\\d+), blockSize=(\\d+)");
Matcher matcher = regex.matcher(defaultCodec.toString());
assertTrue("Unexpected CompressingCodec toString() output: " + defaultCodec.toString(), matcher.find());
int maxDocsPerChunk = Integer.parseInt(matcher.group(1));
int blockSize = Integer.parseInt(matcher.group(2));
int product = maxDocsPerChunk * blockSize;
assumeTrue(defaultCodec.getName() + " maxDocsPerChunk (" + maxDocsPerChunk + ") * blockSize (" + blockSize + ") < 16 - this can trigger OOM with -Dtests.heapsize=30g", product >= 16);
}
BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostingsBytes1"));
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
}
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
MergePolicy mp = w.getConfig().getMergePolicy();
if (mp instanceof LogByteSizeMergePolicy) {
// 1 petabyte:
((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
}
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
ft.setOmitNorms(true);
MyTokenStream tokenStream = new MyTokenStream();
Field field = new Field("field", tokenStream, ft);
doc.add(field);
final int numDocs = 1000;
for (int i = 0; i < numDocs; i++) {
if (i % 2 == 1) {
// trick blockPF's little optimization
tokenStream.n = 65536;
} else {
tokenStream.n = 65537;
}
w.addDocument(doc);
}
w.forceMerge(1);
w.close();
DirectoryReader oneThousand = DirectoryReader.open(dir);
DirectoryReader[] subReaders = new DirectoryReader[1000];
Arrays.fill(subReaders, oneThousand);
BaseDirectoryWrapper dir2 = newFSDirectory(createTempDir("2BPostingsBytes2"));
if (dir2 instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir2).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
}
IndexWriter w2 = new IndexWriter(dir2, new IndexWriterConfig(null));
TestUtil.addIndexesSlowly(w2, subReaders);
w2.forceMerge(1);
w2.close();
oneThousand.close();
DirectoryReader oneMillion = DirectoryReader.open(dir2);
subReaders = new DirectoryReader[2000];
Arrays.fill(subReaders, oneMillion);
BaseDirectoryWrapper dir3 = newFSDirectory(createTempDir("2BPostingsBytes3"));
if (dir3 instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir3).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
}
IndexWriter w3 = new IndexWriter(dir3, new IndexWriterConfig(null));
TestUtil.addIndexesSlowly(w3, subReaders);
w3.forceMerge(1);
w3.close();
oneMillion.close();
dir.close();
dir2.close();
dir3.close();
}
use of org.apache.lucene.document.FieldType in project lucene-solr by apache.
the class Test2BTerms method test2BTerms.
public void test2BTerms() throws IOException {
System.out.println("Starting Test2B");
final long TERM_COUNT = ((long) Integer.MAX_VALUE) + 100000000;
final int TERMS_PER_DOC = TestUtil.nextInt(random(), 100000, 1000000);
List<BytesRef> savedTerms = null;
BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BTerms"));
//MockDirectoryWrapper dir = newFSDirectory(new File("/p/lucene/indices/2bindex"));
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper) dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
}
// don't double-checkindex
dir.setCheckIndexOnClose(false);
if (true) {
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setRAMBufferSizeMB(256.0).setMergeScheduler(new ConcurrentMergeScheduler()).setMergePolicy(newLogMergePolicy(false, 10)).setOpenMode(IndexWriterConfig.OpenMode.CREATE).setCodec(TestUtil.getDefaultCodec()));
MergePolicy mp = w.getConfig().getMergePolicy();
if (mp instanceof LogByteSizeMergePolicy) {
// 1 petabyte:
((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024 * 1024 * 1024);
}
Document doc = new Document();
final MyTokenStream ts = new MyTokenStream(random(), TERMS_PER_DOC);
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setIndexOptions(IndexOptions.DOCS);
customType.setOmitNorms(true);
Field field = new Field("field", ts, customType);
doc.add(field);
//w.setInfoStream(System.out);
final int numDocs = (int) (TERM_COUNT / TERMS_PER_DOC);
System.out.println("TERMS_PER_DOC=" + TERMS_PER_DOC);
System.out.println("numDocs=" + numDocs);
for (int i = 0; i < numDocs; i++) {
final long t0 = System.currentTimeMillis();
w.addDocument(doc);
System.out.println(i + " of " + numDocs + " " + (System.currentTimeMillis() - t0) + " msec");
}
savedTerms = ts.savedTerms;
System.out.println("TEST: full merge");
w.forceMerge(1);
System.out.println("TEST: close writer");
w.close();
}
System.out.println("TEST: open reader");
final IndexReader r = DirectoryReader.open(dir);
if (savedTerms == null) {
savedTerms = findTerms(r);
}
final int numSavedTerms = savedTerms.size();
final List<BytesRef> bigOrdTerms = new ArrayList<>(savedTerms.subList(numSavedTerms - 10, numSavedTerms));
System.out.println("TEST: test big ord terms...");
testSavedTerms(r, bigOrdTerms);
System.out.println("TEST: test all saved terms...");
testSavedTerms(r, savedTerms);
r.close();
System.out.println("TEST: now CheckIndex...");
CheckIndex.Status status = TestUtil.checkIndex(dir);
final long tc = status.segmentInfos.get(0).termIndexStatus.termCount;
assertTrue("count " + tc + " is not > " + Integer.MAX_VALUE, tc > Integer.MAX_VALUE);
dir.close();
System.out.println("TEST: done!");
}
Aggregations