use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.
the class StreamUtils method sourceFromEmbedded.
/**
* Creates a {@code StreamSource} instance that reads data from an
* embedded resource.
* @param resourceName a required resource name
* @return a stream source that reads from {@code name}
*/
public static StreamSource sourceFromEmbedded(String resourceName) {
return new StreamSource() {
@Override
public InputStream getStream() {
InputStream res = StreamUtils.class.getClassLoader().getResourceAsStream(resourceName);
assertNotNull(res, "resource " + resourceName);
return new BufferedInputStream(res);
}
};
}
use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.
the class SourceSplitterTest method shouldHandleStreamedDocsOfLongerLength.
@Test
public void shouldHandleStreamedDocsOfLongerLength() throws IOException {
// 0 0
// 0-- - 5-- - -1--- - 5--- - 2-
final String INPUT = "ab\r\ncde\r\nefgh\r\nijk\r\nlm";
StreamSource src = StreamSource.fromString(INPUT);
SourceSplitter splitter = new SourceSplitter();
splitter.reset(src);
assertEquals(5, splitter.count(), "split count");
assertEquals(0, splitter.getOffset(0), "split offset");
assertEquals(4, splitter.getOffset(1), "split offset");
assertEquals(9, splitter.getOffset(2), "split offset");
assertEquals(15, splitter.getOffset(3), "split offset");
assertEquals(20, splitter.getOffset(4), "split offset");
assertEquals(22, splitter.getOffset(5), "split offset");
/*
* Test findLineIndex() for every character with an alternate
* computation that counts every LF.
*/
for (int i = 0; i < splitter.originalLength(); ++i) {
char c = INPUT.charAt(i);
int li = splitter.findLineIndex(i);
long numLF = INPUT.substring(0, i + 1).chars().filter(ch -> ch == '\n').count();
long exp = numLF - (c == '\n' ? 1 : 0);
assertEquals(exp, li, "split find-index of " + i);
}
}
use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.
the class LineBreakerTest method shouldSplitDocsWithNoLastLF.
@Test
public void shouldSplitDocsWithNoLastLF() throws IOException {
StreamSource src = StreamSource.fromString("abc\r\ndef");
brkr.reset(src);
assertEquals(2, brkr.count(), "split count");
assertEquals(0, brkr.getOffset(0), "split offset");
assertEquals(5, brkr.getOffset(1), "split offset");
assertEquals(8, brkr.getOffset(2), "split offset");
}
use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.
the class LineBreakerTest method shouldSplitEmptyStringIntoOneLine.
@Test
public void shouldSplitEmptyStringIntoOneLine() throws IOException {
StreamSource src = StreamSource.fromString("");
brkr.reset(src);
assertEquals(1, brkr.count(), "split count");
assertEquals(0, brkr.getOffset(0), "split offset");
assertEquals(0, brkr.findLineIndex(0), "split find-index");
assertEquals(-1, brkr.findLineIndex(1), "split find-index");
}
use of org.opengrok.indexer.analysis.StreamSource in project OpenGrok by OpenGrok.
the class GZIPAnalyzer method analyze.
@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException {
AbstractAnalyzer fa;
StreamSource gzSrc = wrap(src);
String path = doc.get(QueryBuilder.PATH);
if (path != null && path.toLowerCase(Locale.ROOT).endsWith(".gz")) {
String newname = path.substring(0, path.length() - 3);
// System.err.println("GZIPPED OF = " + newname);
try (InputStream gzis = gzSrc.getStream()) {
fa = AnalyzerGuru.getAnalyzer(gzis, newname);
}
if (fa == null) {
this.g = Genre.DATA;
LOGGER.log(Level.WARNING, "Did not analyze {0}, detected as data.", newname);
// TODO we could probably wrap tar analyzer here, need to do research on reader coming from gzis ...
} else {
// simple file gziped case captured here
if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) {
this.g = Genre.XREFABLE;
} else {
this.g = Genre.DATA;
}
fa.analyze(doc, gzSrc, xrefOut);
if (doc.get(QueryBuilder.T) != null) {
doc.removeField(QueryBuilder.T);
if (g == Genre.XREFABLE) {
doc.add(new Field(QueryBuilder.T, g.typeName(), AnalyzerGuru.string_ft_stored_nanalyzed_norms));
}
}
}
}
}
Aggregations