use of org.apache.lucene.util.Version in project jackrabbit-oak by apache.
the class NodeStateAnalyzerFactory method createAnalyzerViaReflection.
private Analyzer createAnalyzerViaReflection(NodeState state) {
String clazz = state.getString(LuceneIndexConstants.ANL_CLASS);
Class<? extends Analyzer> analyzerClazz = defaultLoader.findClass(clazz, Analyzer.class);
Version matchVersion = getVersion(state);
CharArraySet stopwords = null;
if (StopwordAnalyzerBase.class.isAssignableFrom(analyzerClazz) && state.hasChildNode(LuceneIndexConstants.ANL_STOPWORDS)) {
try {
stopwords = loadStopwordSet(state.getChildNode(LuceneIndexConstants.ANL_STOPWORDS), LuceneIndexConstants.ANL_STOPWORDS, matchVersion);
} catch (IOException e) {
throw new RuntimeException("Error occurred while loading stopwords", e);
}
}
Constructor<? extends Analyzer> c = null;
try {
if (stopwords != null) {
c = analyzerClazz.getConstructor(Version.class, CharArraySet.class);
return c.newInstance(matchVersion, stopwords);
} else {
c = analyzerClazz.getConstructor(Version.class);
return c.newInstance(matchVersion);
}
} catch (NoSuchMethodException e) {
throw new RuntimeException("Error occurred while instantiating Analyzer for " + analyzerClazz, e);
} catch (InstantiationException e) {
throw new RuntimeException("Error occurred while instantiating Analyzer for " + analyzerClazz, e);
} catch (IllegalAccessException e) {
throw new RuntimeException("Error occurred while instantiating Analyzer for " + analyzerClazz, e);
} catch (InvocationTargetException e) {
throw new RuntimeException("Error occurred while instantiating Analyzer for " + analyzerClazz, e);
}
}
use of org.apache.lucene.util.Version in project lucene-solr by apache.
the class Lucene50SegmentInfoFormat method read.
@Override
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
Throwable priorE = null;
SegmentInfo si = null;
try {
CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_START, Lucene50SegmentInfoFormat.VERSION_CURRENT, segmentID, "");
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String, String> diagnostics = input.readMapOfStrings();
final Set<String> files = input.readSetOfStrings();
final Map<String, String> attributes = input.readMapOfStrings();
si = new SegmentInfo(dir, version, null, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
si.setFiles(files);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return si;
}
}
use of org.apache.lucene.util.Version in project lucene-solr by apache.
the class SegmentInfos method readCommit.
/** Read the commit from the provided {@link ChecksumIndexInput}. */
public static final SegmentInfos readCommit(Directory directory, ChecksumIndexInput input, long generation) throws IOException {
// NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
// to read the magic ourselves.
int magic = input.readInt();
if (magic != CodecUtil.CODEC_MAGIC) {
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
}
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_53, VERSION_CURRENT);
byte[] id = new byte[StringHelper.ID_LENGTH];
input.readBytes(id, 0, id.length);
CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
if (luceneVersion.onOrAfter(Version.LUCENE_6_0_0) == false) {
// TODO: should we check indexCreatedVersion instead?
throw new IndexFormatTooOldException(input, "this index is too old (version: " + luceneVersion + ")");
}
int indexCreatedVersion = 6;
if (format >= VERSION_70) {
indexCreatedVersion = input.readVInt();
}
SegmentInfos infos = new SegmentInfos(indexCreatedVersion);
infos.id = id;
infos.generation = generation;
infos.lastGeneration = generation;
infos.luceneVersion = luceneVersion;
infos.version = input.readLong();
//System.out.println("READ sis version=" + infos.version);
infos.counter = input.readInt();
int numSegments = input.readInt();
if (numSegments < 0) {
throw new CorruptIndexException("invalid segment count: " + numSegments, input);
}
if (numSegments > 0) {
infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
} else {
// else leave as null: no segments
}
long totalDocs = 0;
for (int seg = 0; seg < numSegments; seg++) {
String segName = input.readString();
final byte[] segmentID;
byte hasID = input.readByte();
if (hasID == 1) {
segmentID = new byte[StringHelper.ID_LENGTH];
input.readBytes(segmentID, 0, segmentID.length);
} else if (hasID == 0) {
throw new IndexFormatTooOldException(input, "Segment is from Lucene 4.x");
} else {
throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
}
Codec codec = readCodec(input, format < VERSION_53);
SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
info.setCodec(codec);
totalDocs += info.maxDoc();
long delGen = input.readLong();
int delCount = input.readInt();
if (delCount < 0 || delCount > info.maxDoc()) {
throw new CorruptIndexException("invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
}
long fieldInfosGen = input.readLong();
long dvGen = input.readLong();
SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen);
siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
final Map<Integer, Set<String>> dvUpdateFiles;
final int numDVFields = input.readInt();
if (numDVFields == 0) {
dvUpdateFiles = Collections.emptyMap();
} else {
Map<Integer, Set<String>> map = new HashMap<>(numDVFields);
for (int i = 0; i < numDVFields; i++) {
map.put(input.readInt(), input.readSetOfStrings());
}
dvUpdateFiles = Collections.unmodifiableMap(map);
}
siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
infos.add(siPerCommit);
Version segmentVersion = info.getVersion();
if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input);
}
if (infos.indexCreatedVersionMajor >= 7 && segmentVersion.major < infos.indexCreatedVersionMajor) {
throw new CorruptIndexException("segments file recorded indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor + " but segment=" + info + " has older version=" + segmentVersion, input);
}
if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) {
throw new CorruptIndexException("segments infos must record minVersion with indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor, input);
}
}
infos.userData = input.readMapOfStrings();
CodecUtil.checkFooter(input);
// LUCENE-6299: check we are in bounds
if (totalDocs > IndexWriter.getActualMaxDocs()) {
throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input);
}
return infos;
}
use of org.apache.lucene.util.Version in project lucene-solr by apache.
the class TestSimilarityBase method testLengthEncodingBackwardCompatibility.
public void testLengthEncodingBackwardCompatibility() throws IOException {
Similarity similarity = RandomPicks.randomFrom(random(), sims);
for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major }) {
for (int length : new int[] { 1, 2, 4 }) {
// these length values are encoded accurately on both cases
Directory dir = newDirectory();
// set the version on the directory
new SegmentInfos(indexCreatedVersionMajor).commit(dir);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
Document doc = new Document();
String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
doc.add(new TextField("foo", value, Store.NO));
w.addDocument(doc);
IndexReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(similarity);
Term term = new Term("foo", "b");
TermContext context = TermContext.build(reader.getContext(), term);
SimWeight simWeight = similarity.computeWeight(1f, searcher.collectionStatistics("foo"), searcher.termStatistics(term, context));
SimilarityBase.BasicSimScorer simScorer = (SimilarityBase.BasicSimScorer) similarity.simScorer(simWeight, reader.leaves().get(0));
float docLength = simScorer.getLengthValue(0);
assertEquals(length, (int) docLength);
w.close();
reader.close();
dir.close();
}
}
}
use of org.apache.lucene.util.Version in project lucene-solr by apache.
the class TestBM25Similarity method testLengthEncodingBackwardCompatibility.
public void testLengthEncodingBackwardCompatibility() throws IOException {
Similarity similarity = new BM25Similarity();
for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major }) {
for (int length : new int[] { 1, 2, 4 }) {
// these length values are encoded accurately on both cases
Directory dir = newDirectory();
// set the version on the directory
new SegmentInfos(indexCreatedVersionMajor).commit(dir);
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
Document doc = new Document();
String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
doc.add(new TextField("foo", value, Store.NO));
w.addDocument(doc);
IndexReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
searcher.setSimilarity(similarity);
Explanation expl = searcher.explain(new TermQuery(new Term("foo", "b")), 0);
Explanation docLen = findExplanation(expl, "fieldLength");
assertNotNull(docLen);
assertEquals(docLen.toString(), length, (int) docLen.getValue());
w.close();
reader.close();
dir.close();
}
}
}
Aggregations