use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class ParseTreePatternMatcher method tokenize.
public List<? extends Token> tokenize(String pattern) {
// split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
List<Chunk> chunks = split(pattern);
// create token stream from text and tags
List<Token> tokens = new ArrayList<Token>();
for (Chunk chunk : chunks) {
if (chunk instanceof TagChunk) {
TagChunk tagChunk = (TagChunk) chunk;
// add special rule token or conjure up new token from name
if (Character.isUpperCase(tagChunk.getTag().charAt(0))) {
Integer ttype = parser.getTokenType(tagChunk.getTag());
if (ttype == Token.INVALID_TYPE) {
throw new IllegalArgumentException("Unknown token " + tagChunk.getTag() + " in pattern: " + pattern);
}
TokenTagToken t = new TokenTagToken(tagChunk.getTag(), ttype, tagChunk.getLabel());
tokens.add(t);
} else if (Character.isLowerCase(tagChunk.getTag().charAt(0))) {
int ruleIndex = parser.getRuleIndex(tagChunk.getTag());
if (ruleIndex == -1) {
throw new IllegalArgumentException("Unknown rule " + tagChunk.getTag() + " in pattern: " + pattern);
}
int ruleImaginaryTokenType = parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex];
tokens.add(new RuleTagToken(tagChunk.getTag(), ruleImaginaryTokenType, tagChunk.getLabel()));
} else {
throw new IllegalArgumentException("invalid tag: " + tagChunk.getTag() + " in pattern: " + pattern);
}
} else {
TextChunk textChunk = (TextChunk) chunk;
ANTLRInputStream in = new ANTLRInputStream(textChunk.getText());
lexer.setInputStream(in);
Token t = lexer.nextToken();
while (t.getType() != Token.EOF) {
tokens.add(t);
t = lexer.nextToken();
}
}
}
// System.out.println("tokens="+tokens);
return tokens;
}
use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class ParseTreePatternMatcher method compile.
/**
* For repeated use of a tree pattern, compile it to a
* {@link ParseTreePattern} using this method.
*/
public ParseTreePattern compile(String pattern, int patternRuleIndex) {
List<? extends Token> tokenList = tokenize(pattern);
ListTokenSource tokenSrc = new ListTokenSource(tokenList);
CommonTokenStream tokens = new CommonTokenStream(tokenSrc);
ParserInterpreter parserInterp = new ParserInterpreter(parser.getGrammarFileName(), parser.getVocabulary(), Arrays.asList(parser.getRuleNames()), parser.getATNWithBypassAlts(), tokens);
ParseTree tree = null;
try {
parserInterp.setErrorHandler(new BailErrorStrategy());
tree = parserInterp.parse(patternRuleIndex);
// System.out.println("pattern tree = "+tree.toStringTree(parserInterp));
} catch (ParseCancellationException e) {
throw (RecognitionException) e.getCause();
} catch (RecognitionException re) {
throw re;
} catch (Exception e) {
throw new CannotInvokeStartRule(e);
}
// Make sure tree pattern compilation checks for a complete parse
if (tokens.LA(1) != Token.EOF) {
throw new StartRuleDoesNotConsumeFullPattern();
}
return new ParseTreePattern(this, pattern, patternRuleIndex, tree);
}
use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class XPath method split.
// TODO: check for invalid token/rule names, bad syntax
public XPathElement[] split(String path) {
ANTLRInputStream in;
try {
in = new ANTLRInputStream(new StringReader(path));
} catch (IOException ioe) {
throw new IllegalArgumentException("Could not read path: " + path, ioe);
}
XPathLexer lexer = new XPathLexer(in) {
@Override
public void recover(LexerNoViableAltException e) {
throw e;
}
};
lexer.removeErrorListeners();
lexer.addErrorListener(new XPathLexerErrorListener());
CommonTokenStream tokenStream = new CommonTokenStream(lexer);
try {
tokenStream.fill();
} catch (LexerNoViableAltException e) {
int pos = lexer.getCharPositionInLine();
String msg = "Invalid tokens or characters at index " + pos + " in path '" + path + "'";
throw new IllegalArgumentException(msg, e);
}
List<Token> tokens = tokenStream.getTokens();
// System.out.println("path="+path+"=>"+tokens);
List<XPathElement> elements = new ArrayList<XPathElement>();
int n = tokens.size();
int i = 0;
loop: while (i < n) {
Token el = tokens.get(i);
Token next = null;
switch(el.getType()) {
case XPathLexer.ROOT:
case XPathLexer.ANYWHERE:
boolean anywhere = el.getType() == XPathLexer.ANYWHERE;
i++;
next = tokens.get(i);
boolean invert = next.getType() == XPathLexer.BANG;
if (invert) {
i++;
next = tokens.get(i);
}
XPathElement pathElement = getXPathElement(next, anywhere);
pathElement.invert = invert;
elements.add(pathElement);
i++;
break;
case XPathLexer.TOKEN_REF:
case XPathLexer.RULE_REF:
case XPathLexer.WILDCARD:
elements.add(getXPathElement(el, false));
i++;
break;
case Token.EOF:
break loop;
default:
throw new IllegalArgumentException("Unknowth path element " + el);
}
}
return elements.toArray(new XPathElement[0]);
}
use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class XPath method evaluate.
/**
* Return a list of all nodes starting at {@code t} as root that satisfy the
* path. The root {@code /} is relative to the node passed to
* {@link #evaluate}.
*/
public Collection<ParseTree> evaluate(final ParseTree t) {
ParserRuleContext dummyRoot = new ParserRuleContext();
// don't set t's parent.
dummyRoot.children = Collections.singletonList(t);
Collection<ParseTree> work = Collections.<ParseTree>singleton(dummyRoot);
int i = 0;
while (i < elements.length) {
Collection<ParseTree> next = new LinkedHashSet<ParseTree>();
for (ParseTree node : work) {
if (node.getChildCount() > 0) {
// only try to match next element if it has children
// e.g., //func/*/stat might have a token node for which
// we can't go looking for stat nodes.
Collection<? extends ParseTree> matching = elements[i].evaluate(node);
next.addAll(matching);
}
}
i++;
work = next;
}
return work;
}
use of org.antlr.v4.runtime.Token in project antlr4 by antlr.
the class XPathLexer method nextToken.
@Override
public Token nextToken() {
_tokenStartCharIndex = _input.index();
CommonToken t = null;
while (t == null) {
switch(_input.LA(1)) {
case '/':
consume();
if (_input.LA(1) == '/') {
consume();
t = new CommonToken(ANYWHERE, "//");
} else {
t = new CommonToken(ROOT, "/");
}
break;
case '*':
consume();
t = new CommonToken(WILDCARD, "*");
break;
case '!':
consume();
t = new CommonToken(BANG, "!");
break;
case '\'':
String s = matchString();
t = new CommonToken(STRING, s);
break;
case CharStream.EOF:
return new CommonToken(EOF, "<EOF>");
default:
if (isNameStartChar(_input.LA(1))) {
String id = matchID();
if (Character.isUpperCase(id.charAt(0)))
t = new CommonToken(TOKEN_REF, id);
else
t = new CommonToken(RULE_REF, id);
} else {
throw new LexerNoViableAltException(this, _input, _tokenStartCharIndex, null);
}
break;
}
}
t.setStartIndex(_tokenStartCharIndex);
t.setCharPositionInLine(_tokenStartCharIndex);
t.setLine(line);
return t;
}
Aggregations