use of org.opengrok.indexer.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.
the class TroffAnalyzer method analyze.
@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
// this is to explicitly use appropriate analyzers tokenstream to workaround #1376 symbols search works like full text search
JFlexTokenizer symbolTokenizer = symbolTokenizerFactory.get();
symbolTokenizer.setReader(getReader(src.getStream()));
OGKTextField full = new OGKTextField(QueryBuilder.FULL, symbolTokenizer);
doc.add(full);
if (xrefOut != null) {
try (Reader in = getReader(src.getStream())) {
WriteXrefArgs args = new WriteXrefArgs(in, xrefOut);
args.setProject(project);
Xrefer xref = writeXref(args);
String path = doc.get(QueryBuilder.PATH);
addNumLinesLOC(doc, new NumLinesLOC(path, xref.getLineNumber(), xref.getLOC()));
}
}
}
use of org.opengrok.indexer.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.
the class CustomAssertions method assertSymbolStream.
/**
* Asserts the specified tokenizer class produces an expected stream of
* symbols from the specified input.
* @param klass the test class
* @param iss the input stream
* @param expectedTokens the expected, ordered token list
* @throws java.lang.Exception if an error occurs constructing a
* {@code klass} instance or testing the stream
*/
public static void assertSymbolStream(Class<? extends JFlexSymbolMatcher> klass, InputStream iss, List<String> expectedTokens) throws Exception {
byte[] inputCopy = copyStream(iss);
String input = new String(inputCopy, StandardCharsets.UTF_8);
JFlexTokenizer tokenizer = new JFlexTokenizer(klass.getConstructor(Reader.class).newInstance(new InputStreamReader(new ByteArrayInputStream(inputCopy), StandardCharsets.UTF_8)));
CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class);
OffsetAttribute offs = tokenizer.addAttribute(OffsetAttribute.class);
int count = 0;
List<String> tokens = new ArrayList<>();
while (tokenizer.incrementToken()) {
String termValue = term.toString();
tokens.add(termValue);
String cutValue = input.substring(offs.startOffset(), offs.endOffset());
assertEquals(cutValue, termValue, "cut term" + (1 + count));
++count;
}
count = 0;
for (String token : tokens) {
// 1-based offset to accord with line #
if (count >= expectedTokens.size()) {
printTokens(tokens);
assertTrue(count < expectedTokens.size(), "too many tokens at term" + (1 + count) + ": " + token);
}
String expected = expectedTokens.get(count);
if (!token.equals(expected)) {
printTokens(tokens);
assertEquals(expected, token, "term" + (1 + count));
}
count++;
}
if (expectedTokens.size() != count) {
printTokens(tokens);
assertEquals(expectedTokens.size(), count, "wrong number of tokens");
}
}
use of org.opengrok.indexer.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.
the class HaskellSymbolTokenizerTest method getTermsFor.
private String[] getTermsFor(Reader r) {
List<String> l = new LinkedList<>();
JFlexTokenizer ts = (JFlexTokenizer) this.analyzer.tokenStream("refs", r);
ts.setReader(r);
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
try {
ts.reset();
while (ts.incrementToken()) {
l.add(term.toString());
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
return l.toArray(new String[0]);
}
use of org.opengrok.indexer.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.
the class JavaSymbolTokenizerTest method getTermsFor.
private String[] getTermsFor(Reader r) {
List<String> l = new LinkedList<>();
JFlexTokenizer ts = (JFlexTokenizer) this.analyzer.tokenStream("refs", r);
ts.setReader(r);
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
try {
ts.reset();
while (ts.incrementToken()) {
l.add(term.toString());
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
return l.toArray(new String[0]);
}
Aggregations