Search in sources :

Example 6 with NumLinesLOC

use of org.opengrok.indexer.analysis.NumLinesLOC in project OpenGrok by OpenGrok.

the class TroffAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
    // this is to explicitly use appropriate analyzers tokenstream to workaround #1376 symbols search works like full text search
    JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
    symbolTokenizer.setReader(getReader(src.getStream()));
    OGKTextField full = new OGKTextField(QueryBuilder.FULL, symbolTokenizer);
    doc.add(full);
    if (xrefOut != null) {
        try (Reader in = getReader(src.getStream())) {
            WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
            args.setProject(project);
            Xrefer xref = writeXref(args);
            String path = doc.get(QueryBuilder.PATH);
            addNumLinesLOC(doc, new NumLinesLOC(path, xref.getLineNumber(), xref.getLOC()));
        }
    }
}
Also used : JFlexTokenizer(org.opengrok.indexer.analysis.JFlexTokenizer) OGKTextField(org.opengrok.indexer.analysis.OGKTextField) NumLinesLOC(org.opengrok.indexer.analysis.NumLinesLOC) Xrefer(org.opengrok.indexer.analysis.Xrefer) Reader(java.io.Reader) WriteXrefArgs(org.opengrok.indexer.analysis.WriteXrefArgs)

Example 7 with NumLinesLOC

use of org.opengrok.indexer.analysis.NumLinesLOC in project OpenGrok by OpenGrok.

the class IndexDatabase method removeFile.

/**
 * Remove a stale file (uidIter.term().text()) from the index database and
 * history cache, and queue the removal of xref.
 *
 * @param removeHistory if false, do not remove history cache for this file
 * @throws java.io.IOException if an error occurs
 */
private void removeFile(boolean removeHistory) throws IOException {
    String path = Util.uid2url(uidIter.term().utf8ToString());
    for (IndexChangedListener listener : listeners) {
        listener.fileRemove(path);
    }
    // Determine if a reversal of counts is necessary, and execute if so.
    if (isCountingDeltas) {
        postsIter = uidIter.postings(postsIter);
        while (postsIter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            // Read a limited-fields version of the document.
            Document doc = reader.document(postsIter.docID(), REVERT_COUNTS_FIELDS);
            if (doc != null) {
                NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc);
                if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) {
                    NumLinesLOC counts = new NumLinesLOC(path, -nullableCounts.getNumLines(), -nullableCounts.getLOC());
                    countsAggregator.register(counts);
                }
                break;
            }
        }
    }
    writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term()));
    removeXrefFile(path);
    if (removeHistory) {
        removeHistoryFile(path);
    }
    setDirty();
    for (IndexChangedListener listener : listeners) {
        listener.fileRemoved(path);
    }
}
Also used : NullableNumLinesLOC(org.opengrok.indexer.analysis.NullableNumLinesLOC) NumLinesLOC(org.opengrok.indexer.analysis.NumLinesLOC) NullableNumLinesLOC(org.opengrok.indexer.analysis.NullableNumLinesLOC) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document)

Example 8 with NumLinesLOC

use of org.opengrok.indexer.analysis.NumLinesLOC in project OpenGrok by OpenGrok.

the class IndexDatabase method update.

/**
 * Update the content of this index database.
 *
 * @throws IOException if an error occurs
 */
public void update() throws IOException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }
    RuntimeEnvironment env = RuntimeEnvironment.getInstance();
    reader = null;
    writer = null;
    settings = null;
    uidIter = null;
    postsIter = null;
    indexedSymlinks.clear();
    IOException finishingException = null;
    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(env.getRamBufferSize());
        writer = new IndexWriter(indexDirectory, iwc);
        // to make sure index exists on the disk
        writer.commit();
        completer = new PendingFileCompleter();
        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }
        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = env.getSourceRootFile();
            } else {
                sourceRoot = new File(env.getSourceRootFile(), dir);
            }
            dir = Util.fixPathIfWindows(dir);
            String startuid = Util.path2uid(dir, "");
            // open existing index
            reader = DirectoryReader.open(indexDirectory);
            countsAggregator = new NumLinesLOCAggregator();
            settings = readAnalysisSettings();
            if (settings == null) {
                settings = new IndexAnalysisSettings3();
            }
            Terms terms = null;
            if (reader.numDocs() > 0) {
                terms = MultiTerms.getTerms(reader, QueryBuilder.U);
                NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
                if (countsAccessor.hasStored(reader)) {
                    isWithDirectoryCounts = true;
                    isCountingDeltas = true;
                } else {
                    boolean foundCounts = countsAccessor.register(countsAggregator, reader);
                    isWithDirectoryCounts = false;
                    isCountingDeltas = foundCounts;
                    if (!isCountingDeltas) {
                        LOGGER.info("Forcing reindexing to fully compute directory counts");
                    }
                }
            } else {
                isWithDirectoryCounts = false;
                isCountingDeltas = false;
            }
            try {
                if (terms != null) {
                    uidIter = terms.iterator();
                    // init uid
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid));
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?", startuid);
                    }
                }
                // The actual indexing happens in indexParallel().
                IndexDownArgs args = new IndexDownArgs();
                Statistics elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir);
                indexDown(sourceRoot, dir, args);
                elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), "indexer.db.directory.traversal");
                showFileCount(dir, args);
                args.cur_count = 0;
                elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir);
                indexParallel(dir, args);
                elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), "indexer.db.directory.index");
                // removed and have higher ordering than any present files.
                while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) {
                    removeFile(true);
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }
                /*
                     * As a signifier that #Lines/LOC are comprehensively
                     * stored so that later calculation is in deltas mode, we
                     * need at least one D-document saved. For a repo with only
                     * non-code files, however, no true #Lines/LOC will have
                     * been saved. Subsequent re-indexing will do more work
                     * than necessary (until a source code file is placed). We
                     * can record zeroes for a fake file under the root to get
                     * a D-document even for this special repo situation.
                     *
                     * Metrics are aggregated for directories up to the root,
                     * so it suffices to put the fake directly under the root.
                     */
                if (!isWithDirectoryCounts) {
                    final String ROOT_FAKE_FILE = "/.OpenGrok_fake_file";
                    countsAggregator.register(new NumLinesLOC(ROOT_FAKE_FILE, 0, 0));
                }
                NumLinesLOCAccessor countsAccessor = new NumLinesLOCAccessor();
                countsAccessor.store(writer, reader, countsAggregator, isWithDirectoryCounts && isCountingDeltas);
                markProjectIndexed(project);
            } finally {
                reader.close();
            }
        }
        // This is deliberate.
        try {
            finishWriting();
        } catch (IOException e) {
            finishingException = e;
        }
    } catch (RuntimeException ex) {
        LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex);
        throw ex;
    } finally {
        completer = null;
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            if (finishingException == null) {
                finishingException = e;
            }
            LOGGER.log(Level.WARNING, "An error occurred while closing writer", e);
        } finally {
            writer = null;
            synchronized (lock) {
                running = false;
            }
        }
    }
    if (finishingException != null) {
        throw finishingException;
    }
    if (!isInterrupted() && isDirty()) {
        if (env.isOptimizeDatabase()) {
            optimize();
        }
        env.setIndexTimestamp();
    }
}
Also used : RuntimeEnvironment(org.opengrok.indexer.configuration.RuntimeEnvironment) NumLinesLOC(org.opengrok.indexer.analysis.NumLinesLOC) NullableNumLinesLOC(org.opengrok.indexer.analysis.NullableNumLinesLOC) Terms(org.apache.lucene.index.Terms) MultiTerms(org.apache.lucene.index.MultiTerms) IOException(java.io.IOException) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Statistics(org.opengrok.indexer.util.Statistics) TermsEnum(org.apache.lucene.index.TermsEnum) IndexWriter(org.apache.lucene.index.IndexWriter) File(java.io.File) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 9 with NumLinesLOC

use of org.opengrok.indexer.analysis.NumLinesLOC in project OpenGrok by OpenGrok.

the class NumLinesLOCAccessor method processFileCounts.

private boolean processFileCounts(NumLinesLOCAggregator countsAggregator, IndexSearcher searcher, TopDocs hits) throws IOException {
    boolean hasDefinedNumLines = false;
    for (ScoreDoc sd : hits.scoreDocs) {
        Document d = searcher.doc(sd.doc);
        NullableNumLinesLOC counts = NumLinesLOCUtil.read(d);
        if (counts.getNumLines() != null && counts.getLOC() != null) {
            NumLinesLOC defCounts = new NumLinesLOC(counts.getPath(), counts.getNumLines(), counts.getLOC());
            countsAggregator.register(defCounts);
            hasDefinedNumLines = true;
        }
    }
    return hasDefinedNumLines;
}
Also used : NullableNumLinesLOC(org.opengrok.indexer.analysis.NullableNumLinesLOC) NumLinesLOC(org.opengrok.indexer.analysis.NumLinesLOC) NullableNumLinesLOC(org.opengrok.indexer.analysis.NullableNumLinesLOC) AccumulatedNumLinesLOC(org.opengrok.indexer.analysis.AccumulatedNumLinesLOC) Document(org.apache.lucene.document.Document) ScoreDoc(org.apache.lucene.search.ScoreDoc)

Aggregations

NumLinesLOC (org.opengrok.indexer.analysis.NumLinesLOC)9 Reader (java.io.Reader)4 OGKTextField (org.opengrok.indexer.analysis.OGKTextField)4 WriteXrefArgs (org.opengrok.indexer.analysis.WriteXrefArgs)4 Xrefer (org.opengrok.indexer.analysis.Xrefer)4 AccumulatedNumLinesLOC (org.opengrok.indexer.analysis.AccumulatedNumLinesLOC)3 JFlexTokenizer (org.opengrok.indexer.analysis.JFlexTokenizer)3 NullableNumLinesLOC (org.opengrok.indexer.analysis.NullableNumLinesLOC)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Document (org.apache.lucene.document.Document)2 Test (org.junit.jupiter.api.Test)2 RuntimeEnvironment (org.opengrok.indexer.configuration.RuntimeEnvironment)2 File (java.io.File)1 Comparator (java.util.Comparator)1 List (java.util.List)1 ExecutionException (java.util.concurrent.ExecutionException)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 StoredField (org.apache.lucene.document.StoredField)1