use of org.opensolaris.opengrok.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.
the class JavaSymbolTokenizerTest method getTermsFor.
private String[] getTermsFor(Reader r) {
List<String> l = new LinkedList<>();
JFlexTokenizer ts = (JFlexTokenizer) this.analyzer.tokenStream("refs", r);
ts.setReader(r);
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
try {
ts.reset();
while (ts.incrementToken()) {
l.add(term.toString());
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
return l.toArray(new String[l.size()]);
}
use of org.opensolaris.opengrok.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.
the class PerlSymbolTokenizerTest method testOffsetAttribute.
/**
* Helper method for {@link #testOffsetAttribute()} that runs the test on
* one single implementation class with the specified input text and
* expected tokens.
*/
private void testOffsetAttribute(Class<? extends JFlexSymbolMatcher> klass, String inputText, String[] expectedTokens) throws Exception {
JFlexSymbolMatcher matcher = klass.getConstructor(Reader.class).newInstance(new StringReader(inputText));
JFlexTokenizer tokenizer = new JFlexTokenizer(matcher);
CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class);
OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
int count = 0;
while (tokenizer.incrementToken()) {
assertTrue("too many tokens", count < expectedTokens.length);
String expected = expectedTokens[count];
// 0-based offset to accord with String[]
assertEquals("term" + count, expected, term.toString());
assertEquals("start" + count, inputText.indexOf(expected), offset.startOffset());
assertEquals("end" + count, inputText.indexOf(expected) + expected.length(), offset.endOffset());
count++;
}
assertEquals("wrong number of tokens", expectedTokens.length, count);
}
use of org.opensolaris.opengrok.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.
the class PhpSymbolTokenizerTest method getTermsFor.
private String[] getTermsFor(Reader r) {
List<String> l = new LinkedList<>();
JFlexTokenizer ts = (JFlexTokenizer) this.analyzer.tokenStream("refs", r);
ts.setReader(r);
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
try {
ts.reset();
while (ts.incrementToken()) {
l.add(term.toString());
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
return l.toArray(new String[l.size()]);
}
use of org.opensolaris.opengrok.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.
the class HaskellSymbolTokenizerTest method getTermsFor.
private String[] getTermsFor(Reader r) {
List<String> l = new LinkedList<>();
JFlexTokenizer ts = (JFlexTokenizer) this.analyzer.tokenStream("refs", r);
ts.setReader(r);
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
try {
ts.reset();
while (ts.incrementToken()) {
l.add(term.toString());
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
return l.toArray(new String[l.size()]);
}
use of org.opensolaris.opengrok.analysis.JFlexTokenizer in project OpenGrok by OpenGrok.
the class CustomAssertions method assertSymbolStream.
/**
* Asserts the specified tokenizer class produces an expected stream of
* symbols from the specified input.
* @param klass the test class
* @param iss the input stream
* @param expectedTokens the expected, ordered token list
* @throws java.lang.Exception if an error occurs constructing a
* {@code klass} instance or testing the stream
*/
public static void assertSymbolStream(Class<? extends JFlexSymbolMatcher> klass, InputStream iss, List<String> expectedTokens) throws Exception {
byte[] inputCopy = copyStream(iss);
String input = new String(inputCopy, StandardCharsets.UTF_8);
JFlexTokenizer tokenizer = new JFlexTokenizer(klass.getConstructor(Reader.class).newInstance(new InputStreamReader(new ByteArrayInputStream(inputCopy), StandardCharsets.UTF_8)));
CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class);
OffsetAttribute offs = tokenizer.addAttribute(OffsetAttribute.class);
int count = 0;
List<String> tokens = new ArrayList<>();
while (tokenizer.incrementToken()) {
String termValue = term.toString();
tokens.add(termValue);
String cutValue = input.substring(offs.startOffset(), offs.endOffset());
assertEquals("cut term" + (1 + count), cutValue, termValue);
++count;
}
count = 0;
for (String token : tokens) {
// 1-based offset to accord with line #
if (count >= expectedTokens.size()) {
printTokens(tokens);
assertTrue("too many tokens at term" + (1 + count) + ": " + token, count < expectedTokens.size());
}
String expected = expectedTokens.get(count);
if (!token.equals(expected)) {
printTokens(tokens);
assertEquals("term" + (1 + count), expected, token);
}
count++;
}
assertEquals("wrong number of tokens", expectedTokens.size(), count);
}
Aggregations