Search in sources :

Example 1 with HistoryException

use of org.opengrok.indexer.history.HistoryException in project OpenGrok by OpenGrok.

the class AnalyzerGuru method populateDocument.

/**
 * Populate a Lucene document with the required fields.
 *
 * @param doc The document to populate
 * @param file The file to index
 * @param path Where the file is located (from source root)
 * @param fa The analyzer to use on the file
 * @param xrefOut Where to write the xref (possibly {@code null})
 * @throws IOException If an exception occurs while collecting the data
 * @throws InterruptedException if a timeout occurs
 */
public void populateDocument(Document doc, File file, String path, AbstractAnalyzer fa, Writer xrefOut) throws IOException, InterruptedException {
    String date = DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND);
    path = Util.fixPathIfWindows(path);
    doc.add(new Field(QueryBuilder.U, Util.path2uid(path, date), string_ft_stored_nanalyzed_norms));
    doc.add(new Field(QueryBuilder.FULLPATH, file.getAbsolutePath(), string_ft_nstored_nanalyzed_norms));
    doc.add(new SortedDocValuesField(QueryBuilder.FULLPATH, new BytesRef(file.getAbsolutePath())));
    if (RuntimeEnvironment.getInstance().isHistoryEnabled()) {
        try {
            HistoryGuru histGuru = HistoryGuru.getInstance();
            HistoryReader hr = histGuru.getHistoryReader(file);
            if (hr != null) {
                doc.add(new TextField(QueryBuilder.HIST, hr));
                History history;
                if ((history = histGuru.getHistory(file)) != null) {
                    List<HistoryEntry> historyEntries = history.getHistoryEntries(1, 0);
                    if (!historyEntries.isEmpty()) {
                        HistoryEntry histEntry = historyEntries.get(0);
                        doc.add(new TextField(QueryBuilder.LASTREV, histEntry.getRevision(), Store.YES));
                    }
                }
            }
        } catch (HistoryException e) {
            LOGGER.log(Level.WARNING, "An error occurred while reading history: ", e);
        }
    }
    doc.add(new Field(QueryBuilder.DATE, date, string_ft_stored_nanalyzed_norms));
    doc.add(new SortedDocValuesField(QueryBuilder.DATE, new BytesRef(date)));
    // `path' is not null, as it was passed to Util.path2uid() above.
    doc.add(new TextField(QueryBuilder.PATH, path, Store.YES));
    Project project = Project.getProject(path);
    if (project != null) {
        doc.add(new TextField(QueryBuilder.PROJECT, project.getPath(), Store.YES));
    }
    /*
         * Use the parent of the path -- not the absolute file as is done for
         * FULLPATH -- so that DIRPATH is the same convention as for PATH
         * above. A StringField, however, is used instead of a TextField.
         */
    File fpath = new File(path);
    String fileParent = fpath.getParent();
    if (fileParent != null && fileParent.length() > 0) {
        String normalizedPath = QueryBuilder.normalizeDirPath(fileParent);
        StringField npstring = new StringField(QueryBuilder.DIRPATH, normalizedPath, Store.NO);
        doc.add(npstring);
    }
    if (fa != null) {
        AbstractAnalyzer.Genre g = fa.getGenre();
        if (g == AbstractAnalyzer.Genre.PLAIN || g == AbstractAnalyzer.Genre.XREFABLE || g == AbstractAnalyzer.Genre.HTML) {
            doc.add(new Field(QueryBuilder.T, g.typeName(), string_ft_stored_nanalyzed_norms));
        }
        fa.analyze(doc, StreamSource.fromFile(file), xrefOut);
        String type = fa.getFileTypeName();
        doc.add(new StringField(QueryBuilder.TYPE, type, Store.YES));
    }
}
Also used : HistoryException(org.opengrok.indexer.history.HistoryException) History(org.opengrok.indexer.history.History) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) Project(org.opengrok.indexer.configuration.Project) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) HistoryEntry(org.opengrok.indexer.history.HistoryEntry) HistoryGuru(org.opengrok.indexer.history.HistoryGuru) File(java.io.File) BytesRef(org.apache.lucene.util.BytesRef) HistoryReader(org.opengrok.indexer.history.HistoryReader)

Example 2 with HistoryException

use of org.opengrok.indexer.history.HistoryException in project OpenGrok by OpenGrok.

the class SearchEngine method results.

/**
 * Get results , if no search was started before, no results are returned.
 * This method will requery if {@code end} is more than first query from search,
 * hence performance hit applies, if you want results in later pages than
 * number of cachePages. {@code end} has to be bigger than {@code start} !
 *
 * @param start start of the hit list
 * @param end end of the hit list
 * @param ret list of results from start to end or null/empty if no search
 * was started
 */
public void results(int start, int end, List<Hit> ret) {
    // return if no start search() was done
    if (hits == null || (end < start)) {
        ret.clear();
        return;
    }
    ret.clear();
    // TODO check if below fits for if end=old hits.length, or it should include it
    if (end > hits.length && !allCollected) {
        // do the requery, we want more than 5 pages
        collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE);
        try {
            searcher.search(query, collector);
        } catch (Exception e) {
            // this exception should never be hit, since search() will hit this before
            LOGGER.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e);
        }
        hits = collector.topDocs().scoreDocs;
        Document d = null;
        for (int i = start; i < hits.length; i++) {
            int docId = hits[i].doc;
            try {
                d = searcher.doc(docId);
            } catch (Exception e) {
                LOGGER.log(Level.SEVERE, SEARCH_EXCEPTION_MSG, e);
            }
            docs.add(d);
        }
        allCollected = true;
    }
    // the only problem is that count of docs is usually smaller than number of results
    for (int ii = start; ii < end; ++ii) {
        boolean alt = (ii % 2 == 0);
        boolean hasContext = false;
        try {
            Document doc = docs.get(ii);
            String filename = doc.get(QueryBuilder.PATH);
            AbstractAnalyzer.Genre genre = AbstractAnalyzer.Genre.get(doc.get(QueryBuilder.T));
            Definitions tags = null;
            IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
            if (tagsField != null) {
                tags = Definitions.deserialize(tagsField.binaryValue().bytes);
            }
            Scopes scopes = null;
            IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
            if (scopesField != null) {
                scopes = Scopes.deserialize(scopesField.binaryValue().bytes);
            }
            int nhits = docs.size();
            if (sourceContext != null) {
                sourceContext.toggleAlt();
                try {
                    if (AbstractAnalyzer.Genre.PLAIN == genre && (source != null)) {
                        // SRCROOT is read with UTF-8 as a default.
                        hasContext = sourceContext.getContext(new InputStreamReader(new FileInputStream(source + filename), StandardCharsets.UTF_8), null, null, null, filename, tags, nhits > 100, getDefinition() != null, ret, scopes);
                    } else if (AbstractAnalyzer.Genre.XREFABLE == genre && data != null && summarizer != null) {
                        int l;
                        /**
                         * For backward compatibility, read the
                         * OpenGrok-produced document using the system
                         * default charset.
                         */
                        try (Reader r = RuntimeEnvironment.getInstance().isCompressXref() ? new HTMLStripCharFilter(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(TandemPath.join(data + Prefix.XREF_P + filename, ".gz")))))) : new HTMLStripCharFilter(new BufferedReader(new FileReader(data + Prefix.XREF_P + filename)))) {
                            l = r.read(content);
                        }
                        // TODO FIX below fragmenter according to either summarizer or context
                        // (to get line numbers, might be hard, since xref writers will need to be fixed too,
                        // they generate just one line of html code now :( )
                        Summary sum = summarizer.getSummary(new String(content, 0, l));
                        Fragment[] fragments = sum.getFragments();
                        for (Fragment fragment : fragments) {
                            String match = fragment.toString();
                            if (match.length() > 0) {
                                if (!fragment.isEllipsis()) {
                                    Hit hit = new Hit(filename, fragment.toString(), "", true, alt);
                                    ret.add(hit);
                                }
                                hasContext = true;
                            }
                        }
                    } else {
                        LOGGER.log(Level.WARNING, "Unknown genre: {0} for {1}", new Object[] { genre, filename });
                        hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
                    }
                } catch (FileNotFoundException exp) {
                    LOGGER.log(Level.WARNING, "Couldn''t read summary from {0} ({1})", new Object[] { filename, exp.getMessage() });
                    hasContext |= sourceContext.getContext(null, null, null, null, filename, tags, false, false, ret, scopes);
                }
            }
            if (historyContext != null) {
                hasContext |= historyContext.getContext(source + filename, filename, ret);
            }
            if (!hasContext) {
                ret.add(new Hit(filename, "...", "", false, alt));
            }
        } catch (IOException | ClassNotFoundException | HistoryException e) {
            LOGGER.log(Level.WARNING, SEARCH_EXCEPTION_MSG, e);
        }
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) MultiReader(org.apache.lucene.index.MultiReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) IndexReader(org.apache.lucene.index.IndexReader) Document(org.apache.lucene.document.Document) Fragment(org.opengrok.indexer.search.Summary.Fragment) GZIPInputStream(java.util.zip.GZIPInputStream) HTMLStripCharFilter(org.apache.lucene.analysis.charfilter.HTMLStripCharFilter) FileReader(java.io.FileReader) InputStreamReader(java.io.InputStreamReader) Definitions(org.opengrok.indexer.analysis.Definitions) HistoryException(org.opengrok.indexer.history.HistoryException) IOException(java.io.IOException) ParseException(org.apache.lucene.queryparser.classic.ParseException) HistoryException(org.opengrok.indexer.history.HistoryException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) FileInputStream(java.io.FileInputStream) IndexableField(org.apache.lucene.index.IndexableField) Scopes(org.opengrok.indexer.analysis.Scopes) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) BufferedReader(java.io.BufferedReader)

Aggregations

HistoryException (org.opengrok.indexer.history.HistoryException)2 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 FileReader (java.io.FileReader)1 IOException (java.io.IOException)1 InputStreamReader (java.io.InputStreamReader)1 Reader (java.io.Reader)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 HTMLStripCharFilter (org.apache.lucene.analysis.charfilter.HTMLStripCharFilter)1 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)1 StringField (org.apache.lucene.document.StringField)1 TextField (org.apache.lucene.document.TextField)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexableField (org.apache.lucene.index.IndexableField)1 MultiReader (org.apache.lucene.index.MultiReader)1