Search in sources :

Example 6 with StreamSource

use of org.opensolaris.opengrok.analysis.StreamSource in project OpenGrok by OpenGrok.

the class DefinitionsTokenStreamTest method getSourceFromResource.

private static StreamSource getSourceFromResource(String name) {
    return new StreamSource() {

        @Override
        public InputStream getStream() throws IOException {
            InputStream srcres = getClass().getClassLoader().getResourceAsStream(name);
            assertNotNull(name + " as resource,", srcres);
            return srcres;
        }
    };
}
Also used : InputStream(java.io.InputStream) StreamSource(org.opensolaris.opengrok.analysis.StreamSource)

Example 7 with StreamSource

use of org.opensolaris.opengrok.analysis.StreamSource in project OpenGrok by OpenGrok.

the class DefinitionsTokenStreamTest method testDefinitionsVsContent.

private void testDefinitionsVsContent(boolean expandTabs, String sourceResource, String tagsResource, int expectedCount, boolean doSupplement, Map<Integer, SimpleEntry<String, String>> overrides) throws IOException {
    StreamSource src = getSourceFromResource(sourceResource);
    // Deserialize the ctags.
    int tabSize = expandTabs ? 8 : 0;
    String suppResource = doSupplement ? sourceResource : null;
    Definitions defs = StreamUtils.readTagsFromResource(tagsResource, suppResource, tabSize);
    // Read the whole input.
    StringBuilder bld = new StringBuilder();
    String source;
    try (Reader rdr = ExpandTabsReader.wrap(IOUtils.createBOMStrippedReader(src.getStream(), StandardCharsets.UTF_8.name()), tabSize)) {
        int c;
        while ((c = rdr.read()) != -1) {
            bld.append((char) c);
        }
        source = bld.toString();
    }
    // Deserialize the token stream.
    DefinitionsTokenStream tokstream = new DefinitionsTokenStream();
    tokstream.initialize(defs, src, (in) -> {
        return ExpandTabsReader.wrap(in, tabSize);
    });
    // Iterate through stream.
    CharTermAttribute term = tokstream.getAttribute(CharTermAttribute.class);
    assertNotNull("CharTermAttribute", term);
    OffsetAttribute offs = tokstream.getAttribute(OffsetAttribute.class);
    assertNotNull("OffsetAttribute", offs);
    int count = 0;
    while (tokstream.incrementToken()) {
        ++count;
        String termValue = term.toString();
        String cutValue = source.substring(offs.startOffset(), offs.endOffset());
        // If an override exists, test it specially.
        if (overrides != null && overrides.containsKey(count)) {
            SimpleEntry<String, String> overkv = overrides.get(count);
            assertEquals("cut term override" + count, overkv.getKey(), cutValue);
            assertEquals("cut term w.r.t. term override" + count, overkv.getValue(), termValue);
            continue;
        }
        boolean cutContainsTerm = cutValue.endsWith(termValue);
        assertTrue("cut term" + count + " at " + (offs.startOffset()) + "-" + (offs.endOffset()) + "[" + cutValue + "] vs [" + termValue + "]", cutContainsTerm);
    }
    assertEquals("token count", expectedCount, count);
}
Also used : CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) StreamSource(org.opensolaris.opengrok.analysis.StreamSource) Definitions(org.opensolaris.opengrok.analysis.Definitions) OffsetAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute) ExpandTabsReader(org.opensolaris.opengrok.analysis.ExpandTabsReader) Reader(java.io.Reader)

Example 8 with StreamSource

use of org.opensolaris.opengrok.analysis.StreamSource in project OpenGrok by OpenGrok.

the class BZip2Analyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException {
    StreamSource bzSrc = wrap(src);
    String path = doc.get("path");
    if (path != null && (path.endsWith(".bz2") || path.endsWith(".BZ2") || path.endsWith(".bz"))) {
        String newname = path.substring(0, path.lastIndexOf('.'));
        // System.err.println("BZIPPED OF = " + newname);
        try (InputStream in = bzSrc.getStream()) {
            fa = AnalyzerGuru.getAnalyzer(in, newname);
        }
        if (fa instanceof BZip2Analyzer) {
            fa = null;
        } else {
            if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) {
                this.g = Genre.XREFABLE;
            } else {
                this.g = Genre.DATA;
            }
            fa.analyze(doc, bzSrc, xrefOut);
            if (doc.get("t") != null) {
                doc.removeField("t");
                if (g == Genre.XREFABLE) {
                    doc.add(new Field("t", g.typeName(), AnalyzerGuru.string_ft_stored_nanalyzed_norms));
                }
            }
        }
    }
}
Also used : Field(org.apache.lucene.document.Field) BufferedInputStream(java.io.BufferedInputStream) CBZip2InputStream(org.apache.tools.bzip2.CBZip2InputStream) InputStream(java.io.InputStream) StreamSource(org.opensolaris.opengrok.analysis.StreamSource)

Example 9 with StreamSource

use of org.opensolaris.opengrok.analysis.StreamSource in project OpenGrok by OpenGrok.

the class LineBreakerTest method shouldSplitDocsWithNoLastLF.

@Test
public void shouldSplitDocsWithNoLastLF() throws IOException {
    StreamSource src = StreamSource.fromString("abc\r\ndef");
    brkr.reset(src);
    assertEquals("split count", 2, brkr.count());
    assertEquals("split position", 0, brkr.getPosition(0));
    assertEquals("split position", 5, brkr.getPosition(1));
}
Also used : StreamSource(org.opensolaris.opengrok.analysis.StreamSource) Test(org.junit.Test)

Example 10 with StreamSource

use of org.opensolaris.opengrok.analysis.StreamSource in project OpenGrok by OpenGrok.

the class LineBreakerTest method shouldSplitEmptyStringIntoOneLine.

@Test
public void shouldSplitEmptyStringIntoOneLine() throws IOException {
    StreamSource src = StreamSource.fromString("");
    brkr.reset(src);
    assertEquals("split count", 1, brkr.count());
    assertEquals("split position", 0, brkr.getPosition(0));
}
Also used : StreamSource(org.opensolaris.opengrok.analysis.StreamSource) Test(org.junit.Test)

Aggregations

StreamSource (org.opensolaris.opengrok.analysis.StreamSource)12 InputStream (java.io.InputStream)6 Test (org.junit.Test)6 BufferedInputStream (java.io.BufferedInputStream)4 IOException (java.io.IOException)3 Field (org.apache.lucene.document.Field)2 BufferedReader (java.io.BufferedReader)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 InputStreamReader (java.io.InputStreamReader)1 Reader (java.io.Reader)1 StringWriter (java.io.StringWriter)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)1 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)1 Document (org.apache.lucene.document.Document)1 CBZip2InputStream (org.apache.tools.bzip2.CBZip2InputStream)1 Assert.assertArrayEquals (org.junit.Assert.assertArrayEquals)1 Assert.assertEquals (org.junit.Assert.assertEquals)1 CtagsReader (org.opensolaris.opengrok.analysis.CtagsReader)1 Definitions (org.opensolaris.opengrok.analysis.Definitions)1