Search in sources :

Example 11 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class PageConfig method evaluateMatchOffset.

/**
 * Determines whether a match offset from a search result has been
 * indicated, and if so tries to calculate a translated xref fragment
 * identifier.
 * @return {@code true} if a xref fragment identifier was calculated by the call to this method
 */
public boolean evaluateMatchOffset() {
    if (fragmentIdentifier == null) {
        int matchOffset = getIntParam(QueryParameters.MATCH_OFFSET_PARAM, -1);
        if (matchOffset >= 0) {
            File resourceFile = getResourceFile();
            if (resourceFile.isFile()) {
                LineBreaker breaker = new LineBreaker();
                StreamSource streamSource = StreamSource.fromFile(resourceFile);
                try {
                    breaker.reset(streamSource, in -> ExpandTabsReader.wrap(in, getProject()));
                    int matchLine = breaker.findLineIndex(matchOffset);
                    if (matchLine >= 0) {
                        // Convert to 1-based offset to accord with OpenGrok line number.
                        fragmentIdentifier = String.valueOf(matchLine + 1);
                        return true;
                    }
                } catch (IOException e) {
                    LOGGER.log(Level.WARNING, String.format("Failed to evaluate match offset for %s", resourceFile), e);
                }
            }
        }
    }
    return false;
}
Also used : StreamSource(org.opengrok.indexer.analysis.StreamSource) LineBreaker(org.opengrok.indexer.util.LineBreaker) IOException(java.io.IOException) File(java.io.File)

Example 12 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class BZip2Analyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException {
    AbstractAnalyzer fa;
    StreamSource bzSrc = wrap(src);
    String path = doc.get(QueryBuilder.PATH);
    if (path != null && (path.endsWith(".bz2") || path.endsWith(".BZ2") || path.endsWith(".bz"))) {
        String newname = path.substring(0, path.lastIndexOf('.'));
        // System.err.println("BZIPPED OF = " + newname);
        try (InputStream in = bzSrc.getStream()) {
            fa = AnalyzerGuru.getAnalyzer(in, newname);
        }
        if (!(fa instanceof BZip2Analyzer)) {
            if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) {
                this.g = Genre.XREFABLE;
            } else {
                this.g = Genre.DATA;
            }
            fa.analyze(doc, bzSrc, xrefOut);
            if (doc.get(QueryBuilder.T) != null) {
                doc.removeField(QueryBuilder.T);
                if (g == Genre.XREFABLE) {
                    doc.add(new Field(QueryBuilder.T, g.typeName(), AnalyzerGuru.string_ft_stored_nanalyzed_norms));
                }
            }
        }
    }
}
Also used : Field(org.apache.lucene.document.Field) BufferedInputStream(java.io.BufferedInputStream) CBZip2InputStream(org.apache.tools.bzip2.CBZip2InputStream) InputStream(java.io.InputStream) AbstractAnalyzer(org.opengrok.indexer.analysis.AbstractAnalyzer) StreamSource(org.opengrok.indexer.analysis.StreamSource)

Example 13 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class OGKUnifiedHighlighter method getRepoFileContent.

private String getRepoFileContent(String repoRelPath, String storedU) throws IOException {
    if (storedU == null) {
        LOGGER.log(Level.FINE, "Missing U[UID] for: {0}", repoRelPath);
        return null;
    }
    String repoAbsPath = env.getSourceRootPath() + repoRelPath;
    File repoAbsFile = new File(repoAbsPath);
    if (!repoAbsFile.exists()) {
        LOGGER.log(Level.FINE, "Missing file: {0}", repoAbsPath);
        return null;
    }
    repoRelPath = Util.fixPathIfWindows(repoRelPath);
    // Verify that timestamp (U) is unchanged by comparing UID.
    String uid = Util.path2uid(repoRelPath, DateTools.timeToString(repoAbsFile.lastModified(), DateTools.Resolution.MILLISECOND));
    BytesRef buid = new BytesRef(uid);
    BytesRef storedBuid = new BytesRef(storedU);
    if (storedBuid.compareTo(buid) != 0) {
        LOGGER.log(Level.FINE, "Last-modified differs for: {0}", repoRelPath);
        return null;
    }
    StringBuilder bld = new StringBuilder();
    StreamSource src = StreamSource.fromFile(repoAbsFile);
    try (InputStream in = src.getStream();
        Reader rdr = getReader(in)) {
        int c;
        while ((c = rdr.read()) != -1) {
            bld.append((char) c);
        }
    }
    return bld.toString();
}
Also used : InputStream(java.io.InputStream) StreamSource(org.opengrok.indexer.analysis.StreamSource) Reader(java.io.Reader) BufferedReader(java.io.BufferedReader) ExpandTabsReader(org.opengrok.indexer.analysis.ExpandTabsReader) File(java.io.File) BytesRef(org.apache.lucene.util.BytesRef)

Example 14 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class DefinitionsTokenStreamTest method testDefinitionsVsContent.

// DefinitionsTokenStream should not be used in try-with-resources
@SuppressWarnings("java:S2095")
private void testDefinitionsVsContent(boolean expandTabs, String sourceResource, String tagsResource, int expectedCount, boolean doSupplement, Map<Integer, SimpleEntry<String, String>> overrides) throws IOException {
    StreamSource src = getSourceFromResource(sourceResource);
    // Deserialize the ctags.
    int tabSize = expandTabs ? 8 : 0;
    String suppResource = doSupplement ? sourceResource : null;
    Definitions defs = StreamUtils.readTagsFromResource(tagsResource, suppResource, tabSize);
    // Read the whole input.
    StringBuilder bld = new StringBuilder();
    String source;
    try (Reader rdr = ExpandTabsReader.wrap(IOUtils.createBOMStrippedReader(src.getStream(), StandardCharsets.UTF_8.name()), tabSize)) {
        int c;
        while ((c = rdr.read()) != -1) {
            bld.append((char) c);
        }
        source = bld.toString();
    }
    // Deserialize the token stream.
    DefinitionsTokenStream tokstream = new DefinitionsTokenStream();
    tokstream.initialize(defs, src, in -> ExpandTabsReader.wrap(in, tabSize));
    // Iterate through stream.
    CharTermAttribute term = tokstream.getAttribute(CharTermAttribute.class);
    assertNotNull(term, "CharTermAttribute");
    OffsetAttribute offs = tokstream.getAttribute(OffsetAttribute.class);
    assertNotNull(offs, "OffsetAttribute");
    int count = 0;
    while (tokstream.incrementToken()) {
        ++count;
        String termValue = term.toString();
        String cutValue = source.substring(offs.startOffset(), offs.endOffset());
        // If an override exists, test it specially.
        if (overrides != null && overrides.containsKey(count)) {
            SimpleEntry<String, String> overkv = overrides.get(count);
            assertEquals(overkv.getKey(), cutValue, "cut term override" + count);
            assertEquals(overkv.getValue(), termValue, "cut term w.r.t. term override" + count);
            continue;
        }
        boolean cutContainsTerm = cutValue.endsWith(termValue);
        assertTrue(cutContainsTerm, "cut term" + count + " at " + (offs.startOffset()) + "-" + (offs.endOffset()) + "[" + cutValue + "] vs [" + termValue + "]");
    }
    assertEquals(expectedCount, count, "token count");
}
Also used : CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) StreamSource(org.opengrok.indexer.analysis.StreamSource) Definitions(org.opengrok.indexer.analysis.Definitions) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) Reader(java.io.Reader) ExpandTabsReader(org.opengrok.indexer.analysis.ExpandTabsReader)

Example 15 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class DefinitionsTokenStreamTest method getSourceFromResource.

private static StreamSource getSourceFromResource(String name) {
    return new StreamSource() {

        @Override
        public InputStream getStream() throws IOException {
            InputStream srcres = getClass().getClassLoader().getResourceAsStream(name);
            assertNotNull(srcres, name + " as resource,");
            return srcres;
        }
    };
}
Also used : InputStream(java.io.InputStream) StreamSource(org.opengrok.indexer.analysis.StreamSource)

Aggregations

StreamSource (org.opengrok.indexer.analysis.StreamSource)16 Test (org.junit.jupiter.api.Test)8 InputStream (java.io.InputStream)7 BufferedInputStream (java.io.BufferedInputStream)4 IOException (java.io.IOException)4 BufferedReader (java.io.BufferedReader)2 File (java.io.File)2 Reader (java.io.Reader)2 Field (org.apache.lucene.document.Field)2 AbstractAnalyzer (org.opengrok.indexer.analysis.AbstractAnalyzer)2 ExpandTabsReader (org.opengrok.indexer.analysis.ExpandTabsReader)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 InputStreamReader (java.io.InputStreamReader)1 StringWriter (java.io.StringWriter)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)1 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)1 Document (org.apache.lucene.document.Document)1 BytesRef (org.apache.lucene.util.BytesRef)1 CBZip2InputStream (org.apache.tools.bzip2.CBZip2InputStream)1