Search in sources :

Example 1 with Version

use of org.apache.lucene.util.Version in project jackrabbit-oak by apache.

the class NodeStateAnalyzerFactory method createAnalyzerViaReflection.

private Analyzer createAnalyzerViaReflection(NodeState state) {
    String clazz = state.getString(LuceneIndexConstants.ANL_CLASS);
    Class<? extends Analyzer> analyzerClazz = defaultLoader.findClass(clazz, Analyzer.class);
    Version matchVersion = getVersion(state);
    CharArraySet stopwords = null;
    if (StopwordAnalyzerBase.class.isAssignableFrom(analyzerClazz) && state.hasChildNode(LuceneIndexConstants.ANL_STOPWORDS)) {
        try {
            stopwords = loadStopwordSet(state.getChildNode(LuceneIndexConstants.ANL_STOPWORDS), LuceneIndexConstants.ANL_STOPWORDS, matchVersion);
        } catch (IOException e) {
            throw new RuntimeException("Error occurred while loading stopwords", e);
        }
    }
    Constructor<? extends Analyzer> c = null;
    try {
        if (stopwords != null) {
            c = analyzerClazz.getConstructor(Version.class, CharArraySet.class);
            return c.newInstance(matchVersion, stopwords);
        } else {
            c = analyzerClazz.getConstructor(Version.class);
            return c.newInstance(matchVersion);
        }
    } catch (NoSuchMethodException e) {
        throw new RuntimeException("Error occurred while instantiating Analyzer for " + analyzerClazz, e);
    } catch (InstantiationException e) {
        throw new RuntimeException("Error occurred while instantiating Analyzer for " + analyzerClazz, e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error occurred while instantiating Analyzer for " + analyzerClazz, e);
    } catch (InvocationTargetException e) {
        throw new RuntimeException("Error occurred while instantiating Analyzer for " + analyzerClazz, e);
    }
}
Also used : CharArraySet(org.apache.lucene.analysis.util.CharArraySet) Version(org.apache.lucene.util.Version) StopwordAnalyzerBase(org.apache.lucene.analysis.util.StopwordAnalyzerBase) IOException(java.io.IOException) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 2 with Version

use of org.apache.lucene.util.Version in project lucene-solr by apache.

the class Lucene50SegmentInfoFormat method read.

@Override
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
    final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
    try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
        Throwable priorE = null;
        SegmentInfo si = null;
        try {
            CodecUtil.checkIndexHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_START, Lucene50SegmentInfoFormat.VERSION_CURRENT, segmentID, "");
            final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
            final int docCount = input.readInt();
            if (docCount < 0) {
                throw new CorruptIndexException("invalid docCount: " + docCount, input);
            }
            final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
            final Map<String, String> diagnostics = input.readMapOfStrings();
            final Set<String> files = input.readSetOfStrings();
            final Map<String, String> attributes = input.readMapOfStrings();
            si = new SegmentInfo(dir, version, null, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, null);
            si.setFiles(files);
        } catch (Throwable exception) {
            priorE = exception;
        } finally {
            CodecUtil.checkFooter(input, priorE);
        }
        return si;
    }
}
Also used : ChecksumIndexInput(org.apache.lucene.store.ChecksumIndexInput) Version(org.apache.lucene.util.Version) SegmentInfo(org.apache.lucene.index.SegmentInfo) CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Example 3 with Version

use of org.apache.lucene.util.Version in project lucene-solr by apache.

the class SegmentInfos method readCommit.

/** Read the commit from the provided {@link ChecksumIndexInput}. */
public static final SegmentInfos readCommit(Directory directory, ChecksumIndexInput input, long generation) throws IOException {
    // NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
    // to read the magic ourselves.
    int magic = input.readInt();
    if (magic != CodecUtil.CODEC_MAGIC) {
        throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
    }
    int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_53, VERSION_CURRENT);
    byte[] id = new byte[StringHelper.ID_LENGTH];
    input.readBytes(id, 0, id.length);
    CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
    Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
    if (luceneVersion.onOrAfter(Version.LUCENE_6_0_0) == false) {
        // TODO: should we check indexCreatedVersion instead?
        throw new IndexFormatTooOldException(input, "this index is too old (version: " + luceneVersion + ")");
    }
    int indexCreatedVersion = 6;
    if (format >= VERSION_70) {
        indexCreatedVersion = input.readVInt();
    }
    SegmentInfos infos = new SegmentInfos(indexCreatedVersion);
    infos.id = id;
    infos.generation = generation;
    infos.lastGeneration = generation;
    infos.luceneVersion = luceneVersion;
    infos.version = input.readLong();
    //System.out.println("READ sis version=" + infos.version);
    infos.counter = input.readInt();
    int numSegments = input.readInt();
    if (numSegments < 0) {
        throw new CorruptIndexException("invalid segment count: " + numSegments, input);
    }
    if (numSegments > 0) {
        infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
    } else {
    // else leave as null: no segments
    }
    long totalDocs = 0;
    for (int seg = 0; seg < numSegments; seg++) {
        String segName = input.readString();
        final byte[] segmentID;
        byte hasID = input.readByte();
        if (hasID == 1) {
            segmentID = new byte[StringHelper.ID_LENGTH];
            input.readBytes(segmentID, 0, segmentID.length);
        } else if (hasID == 0) {
            throw new IndexFormatTooOldException(input, "Segment is from Lucene 4.x");
        } else {
            throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
        }
        Codec codec = readCodec(input, format < VERSION_53);
        SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
        info.setCodec(codec);
        totalDocs += info.maxDoc();
        long delGen = input.readLong();
        int delCount = input.readInt();
        if (delCount < 0 || delCount > info.maxDoc()) {
            throw new CorruptIndexException("invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
        }
        long fieldInfosGen = input.readLong();
        long dvGen = input.readLong();
        SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen);
        siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
        final Map<Integer, Set<String>> dvUpdateFiles;
        final int numDVFields = input.readInt();
        if (numDVFields == 0) {
            dvUpdateFiles = Collections.emptyMap();
        } else {
            Map<Integer, Set<String>> map = new HashMap<>(numDVFields);
            for (int i = 0; i < numDVFields; i++) {
                map.put(input.readInt(), input.readSetOfStrings());
            }
            dvUpdateFiles = Collections.unmodifiableMap(map);
        }
        siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
        infos.add(siPerCommit);
        Version segmentVersion = info.getVersion();
        if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
            throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input);
        }
        if (infos.indexCreatedVersionMajor >= 7 && segmentVersion.major < infos.indexCreatedVersionMajor) {
            throw new CorruptIndexException("segments file recorded indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor + " but segment=" + info + " has older version=" + segmentVersion, input);
        }
        if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) {
            throw new CorruptIndexException("segments infos must record minVersion with indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor, input);
        }
    }
    infos.userData = input.readMapOfStrings();
    CodecUtil.checkFooter(input);
    // LUCENE-6299: check we are in bounds
    if (totalDocs > IndexWriter.getActualMaxDocs()) {
        throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input);
    }
    return infos;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) Codec(org.apache.lucene.codecs.Codec) Version(org.apache.lucene.util.Version)

Example 4 with Version

use of org.apache.lucene.util.Version in project lucene-solr by apache.

the class TestSimilarityBase method testLengthEncodingBackwardCompatibility.

public void testLengthEncodingBackwardCompatibility() throws IOException {
    Similarity similarity = RandomPicks.randomFrom(random(), sims);
    for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major }) {
        for (int length : new int[] { 1, 2, 4 }) {
            // these length values are encoded accurately on both cases
            Directory dir = newDirectory();
            // set the version on the directory
            new SegmentInfos(indexCreatedVersionMajor).commit(dir);
            IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
            Document doc = new Document();
            String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
            doc.add(new TextField("foo", value, Store.NO));
            w.addDocument(doc);
            IndexReader reader = DirectoryReader.open(w);
            IndexSearcher searcher = newSearcher(reader);
            searcher.setSimilarity(similarity);
            Term term = new Term("foo", "b");
            TermContext context = TermContext.build(reader.getContext(), term);
            SimWeight simWeight = similarity.computeWeight(1f, searcher.collectionStatistics("foo"), searcher.termStatistics(term, context));
            SimilarityBase.BasicSimScorer simScorer = (SimilarityBase.BasicSimScorer) similarity.simScorer(simWeight, reader.leaves().get(0));
            float docLength = simScorer.getLengthValue(0);
            assertEquals(length, (int) docLength);
            w.close();
            reader.close();
            dir.close();
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Query(org.apache.lucene.search.Query) RandomPicks(com.carrotsearch.randomizedtesting.generators.RandomPicks) FieldType(org.apache.lucene.document.FieldType) Term(org.apache.lucene.index.Term) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) Directory(org.apache.lucene.store.Directory) Store(org.apache.lucene.document.Field.Store) TermStatistics(org.apache.lucene.search.TermStatistics) TopDocs(org.apache.lucene.search.TopDocs) Explanation(org.apache.lucene.search.Explanation) BytesRef(org.apache.lucene.util.BytesRef) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) TermContext(org.apache.lucene.index.TermContext) Collectors(java.util.stream.Collectors) Version(org.apache.lucene.util.Version) SegmentInfos(org.apache.lucene.index.SegmentInfos) List(java.util.List) FieldInvertState(org.apache.lucene.index.FieldInvertState) IndexWriter(org.apache.lucene.index.IndexWriter) CollectionStatistics(org.apache.lucene.search.CollectionStatistics) TermQuery(org.apache.lucene.search.TermQuery) Field(org.apache.lucene.document.Field) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) TextField(org.apache.lucene.document.TextField) IndexOptions(org.apache.lucene.index.IndexOptions) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) SegmentInfos(org.apache.lucene.index.SegmentInfos) SimWeight(org.apache.lucene.search.similarities.Similarity.SimWeight) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TermContext(org.apache.lucene.index.TermContext) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Example 5 with Version

use of org.apache.lucene.util.Version in project lucene-solr by apache.

the class TestBM25Similarity method testLengthEncodingBackwardCompatibility.

public void testLengthEncodingBackwardCompatibility() throws IOException {
    Similarity similarity = new BM25Similarity();
    for (int indexCreatedVersionMajor : new int[] { Version.LUCENE_6_0_0.major, Version.LATEST.major }) {
        for (int length : new int[] { 1, 2, 4 }) {
            // these length values are encoded accurately on both cases
            Directory dir = newDirectory();
            // set the version on the directory
            new SegmentInfos(indexCreatedVersionMajor).commit(dir);
            IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(similarity));
            Document doc = new Document();
            String value = IntStream.range(0, length).mapToObj(i -> "b").collect(Collectors.joining(" "));
            doc.add(new TextField("foo", value, Store.NO));
            w.addDocument(doc);
            IndexReader reader = DirectoryReader.open(w);
            IndexSearcher searcher = newSearcher(reader);
            searcher.setSimilarity(similarity);
            Explanation expl = searcher.explain(new TermQuery(new Term("foo", "b")), 0);
            Explanation docLen = findExplanation(expl, "fieldLength");
            assertNotNull(docLen);
            assertEquals(docLen.toString(), length, (int) docLen.getValue());
            w.close();
            reader.close();
            dir.close();
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) Explanation(org.apache.lucene.search.Explanation) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Version(org.apache.lucene.util.Version) SegmentInfos(org.apache.lucene.index.SegmentInfos) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) TermQuery(org.apache.lucene.search.TermQuery) Directory(org.apache.lucene.store.Directory) Store(org.apache.lucene.document.Field.Store) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) TextField(org.apache.lucene.document.TextField) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) SegmentInfos(org.apache.lucene.index.SegmentInfos) Explanation(org.apache.lucene.search.Explanation) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) Directory(org.apache.lucene.store.Directory)

Aggregations

Version (org.apache.lucene.util.Version)22 Directory (org.apache.lucene.store.Directory)7 Sort (org.apache.lucene.search.Sort)6 SortField (org.apache.lucene.search.SortField)5 SortedNumericSortField (org.apache.lucene.search.SortedNumericSortField)5 SortedSetSortField (org.apache.lucene.search.SortedSetSortField)5 IOException (java.io.IOException)4 HashSet (java.util.HashSet)4 Codec (org.apache.lucene.codecs.Codec)4 Document (org.apache.lucene.document.Document)4 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)4 SegmentInfo (org.apache.lucene.index.SegmentInfo)4 ChecksumIndexInput (org.apache.lucene.store.ChecksumIndexInput)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Collectors (java.util.stream.Collectors)3 IntStream (java.util.stream.IntStream)3 Store (org.apache.lucene.document.Field.Store)3 TextField (org.apache.lucene.document.TextField)3 DirectoryReader (org.apache.lucene.index.DirectoryReader)3