Search in sources :

Example 6 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class LineBreakerTest method shouldSplitEndingLFsIntoOneMoreLine.

@Test
public void shouldSplitEndingLFsIntoOneMoreLine() throws IOException {
    StreamSource src = StreamSource.fromString("abc\ndef\n");
    brkr.reset(src);
    assertEquals(3, brkr.count(), "split count");
    assertEquals(0, brkr.getOffset(0), "split offset");
    assertEquals(4, brkr.getOffset(1), "split offset");
    assertEquals(8, brkr.getOffset(2), "split offset");
}
Also used : StreamSource(org.opengrok.indexer.analysis.StreamSource) Test(org.junit.jupiter.api.Test)

Example 7 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class LineBreakerTest method shouldHandleInterspersedLineEndings.

@Test
public void shouldHandleInterspersedLineEndings() throws IOException {
    // 0                0
    // 0- -- -5 - -- - 1 - - - -5 -- - -2--
    // 0  1  2    3  4 5   6 7  8 9    0
    // 1
    final String INPUT = "a\rb\nc\r\nd\r\r\r\n\re\n\rf\r\nghij";
    StreamSource src = StreamSource.fromString(INPUT);
    brkr.reset(src);
    assertEquals(11, brkr.count(), "split count");
    assertEquals(0, brkr.getOffset(0), "split offset");
    assertEquals(2, brkr.getOffset(1), "split offset");
    assertEquals(4, brkr.getOffset(2), "split offset");
    assertEquals(7, brkr.getOffset(3), "split offset");
    assertEquals(9, brkr.getOffset(4), "split offset");
    assertEquals(10, brkr.getOffset(5), "split offset");
    assertEquals(12, brkr.getOffset(6), "split offset");
    assertEquals(13, brkr.getOffset(7), "split offset");
    assertEquals(15, brkr.getOffset(8), "split offset");
    assertEquals(16, brkr.getOffset(9), "split offset");
    assertEquals(19, brkr.getOffset(10), "split offset");
    assertEquals(23, brkr.getOffset(11), "split offset");
}
Also used : StreamSource(org.opengrok.indexer.analysis.StreamSource) Test(org.junit.jupiter.api.Test)

Example 8 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class LineBreakerTest method shouldHandleDocsOfLongerLength.

@Test
public void shouldHandleDocsOfLongerLength() throws IOException {
    // 0             0
    // 0-- -  5-- - -1--- - 5--- - 2-
    final String INPUT = "ab\r\ncde\r\nefgh\r\nijk\r\nlm";
    StreamSource src = StreamSource.fromString(INPUT);
    brkr.reset(src);
    assertEquals(5, brkr.count(), "split count");
    assertEquals(0, brkr.getOffset(0), "split offset");
    assertEquals(4, brkr.getOffset(1), "split offset");
    assertEquals(9, brkr.getOffset(2), "split offset");
    assertEquals(15, brkr.getOffset(3), "split offset");
    assertEquals(20, brkr.getOffset(4), "split offset");
    assertEquals(3, brkr.findLineIndex(19), "split find-index");
    assertEquals(4, brkr.findLineIndex(20), "split find-index");
    assertEquals(4, brkr.findLineIndex(21), "split find-index");
}
Also used : StreamSource(org.opengrok.indexer.analysis.StreamSource) Test(org.junit.jupiter.api.Test)

Example 9 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class SourceSplitterTest method shouldHandleInterspersedLineEndings.

@Test
public void shouldHandleInterspersedLineEndings() throws IOException {
    // 0                0
    // 0- -- -5 - -- - 1 - - - -5 -- - -2--
    // 0  1  2    3  4 5   6 7  8 9    0
    // 1
    final String INPUT = "a\rb\nc\r\nd\r\r\r\n\re\n\rf\r\nghij";
    StreamSource src = StreamSource.fromString(INPUT);
    SourceSplitter splitter = new SourceSplitter();
    splitter.reset(src);
    assertEquals(11, splitter.count(), "split count");
    assertEquals(0, splitter.getOffset(0), "split offset");
    assertEquals(2, splitter.getOffset(1), "split offset");
    assertEquals(4, splitter.getOffset(2), "split offset");
    assertEquals(7, splitter.getOffset(3), "split offset");
    assertEquals(9, splitter.getOffset(4), "split offset");
    assertEquals(10, splitter.getOffset(5), "split offset");
    assertEquals(12, splitter.getOffset(6), "split offset");
    assertEquals(13, splitter.getOffset(7), "split offset");
    assertEquals(15, splitter.getOffset(8), "split offset");
    assertEquals(16, splitter.getOffset(9), "split offset");
    assertEquals(19, splitter.getOffset(10), "split offset");
    assertEquals(23, splitter.getOffset(11), "split offset");
}
Also used : StreamSource(org.opengrok.indexer.analysis.StreamSource) Test(org.junit.jupiter.api.Test)

Example 10 with StreamSource

use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.

the class StreamUtils method readTagsFromResource.

public static Definitions readTagsFromResource(String tagsResourceName, String rawResourceName, int tabSize) throws IOException {
    InputStream res = StreamUtils.class.getClassLoader().getResourceAsStream(tagsResourceName);
    assertNotNull(res, tagsResourceName + " as resource");
    BufferedReader in = new BufferedReader(new InputStreamReader(res, StandardCharsets.UTF_8));
    CtagsReader rdr = new CtagsReader();
    rdr.setTabSize(tabSize);
    if (rawResourceName != null) {
        rdr.setSplitterSupplier(() -> {
            /*
                 * This should return truly raw content, as the CtagsReader will
                 * expand tabs according to its setting.
                 */
            SourceSplitter splitter = new SourceSplitter();
            StreamSource src = sourceFromEmbedded(rawResourceName);
            try {
                splitter.reset(src);
            } catch (IOException ex) {
                System.err.println(ex.toString());
                return null;
            }
            return splitter;
        });
    }
    String line;
    while ((line = in.readLine()) != null) {
        rdr.readLine(line);
    }
    return rdr.getDefinitions();
}
Also used : InputStreamReader(java.io.InputStreamReader) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) StreamSource(org.opengrok.indexer.analysis.StreamSource) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) CtagsReader(org.opengrok.indexer.analysis.CtagsReader)

Aggregations

StreamSource (org.opengrok.indexer.analysis.StreamSource)16 Test (org.junit.jupiter.api.Test)8 InputStream (java.io.InputStream)7 BufferedInputStream (java.io.BufferedInputStream)4 IOException (java.io.IOException)4 BufferedReader (java.io.BufferedReader)2 File (java.io.File)2 Reader (java.io.Reader)2 Field (org.apache.lucene.document.Field)2 AbstractAnalyzer (org.opengrok.indexer.analysis.AbstractAnalyzer)2 ExpandTabsReader (org.opengrok.indexer.analysis.ExpandTabsReader)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 InputStreamReader (java.io.InputStreamReader)1 StringWriter (java.io.StringWriter)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)1 OffsetAttribute (org.apache.lucene.analysis.tokenattributes.OffsetAttribute)1 Document (org.apache.lucene.document.Document)1 BytesRef (org.apache.lucene.util.BytesRef)1 CBZip2InputStream (org.apache.tools.bzip2.CBZip2InputStream)1