use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.
the class Parser method isExpectedToken.
/**
* Checks whether or not {@code symbol} can follow the current state in the
* ATN. The behavior of this method is equivalent to the following, but is
* implemented such that the complete context-sensitive follow set does not
* need to be explicitly constructed.
*
* <pre>
* return getExpectedTokens().contains(symbol);
* </pre>
*
* @param symbol the symbol type to check
* @return {@code true} if {@code symbol} can follow the current state in
* the ATN, otherwise {@code false}.
*/
public boolean isExpectedToken(int symbol) {
// return getInterpreter().atn.nextTokens(_ctx);
ATN atn = getInterpreter().atn;
ParserRuleContext ctx = _ctx;
ATNState s = atn.states.get(getState());
IntervalSet following = atn.nextTokens(s);
if (following.contains(symbol)) {
return true;
}
// System.out.println("following "+s+"="+following);
if (!following.contains(Token.EPSILON))
return false;
while (ctx != null && ctx.invokingState >= 0 && following.contains(Token.EPSILON)) {
ATNState invokingState = atn.states.get(ctx.invokingState);
RuleTransition rt = (RuleTransition) invokingState.transition(0);
following = atn.nextTokens(rt.followState);
if (following.contains(symbol)) {
return true;
}
ctx = (ParserRuleContext) ctx.parent;
}
if (following.contains(Token.EPSILON) && symbol == Token.EOF) {
return true;
}
return false;
}
use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.
the class ATN method nextTokens.
/** Compute the set of valid tokens that can occur starting in state {@code s}.
* If {@code ctx} is null, the set of tokens will not include what can follow
* the rule surrounding {@code s}. In other words, the set will be
* restricted to tokens reachable staying within {@code s}'s rule.
*/
public IntervalSet nextTokens(ATNState s, RuleContext ctx) {
LL1Analyzer anal = new LL1Analyzer(this);
IntervalSet next = anal.LOOK(s, ctx);
return next;
}
use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.
the class ATN method getExpectedTokens.
/**
* Computes the set of input symbols which could follow ATN state number
* {@code stateNumber} in the specified full {@code context}. This method
* considers the complete parser context, but does not evaluate semantic
* predicates (i.e. all predicates encountered during the calculation are
* assumed true). If a path in the ATN exists from the starting state to the
* {@link RuleStopState} of the outermost context without matching any
* symbols, {@link Token#EOF} is added to the returned set.
*
* <p>If {@code context} is {@code null}, it is treated as {@link ParserRuleContext#EMPTY}.</p>
*
* Note that this does NOT give you the set of all tokens that could
* appear at a given token position in the input phrase. In other words,
* it does not answer:
*
* "Given a specific partial input phrase, return the set of all tokens
* that can follow the last token in the input phrase."
*
* The big difference is that with just the input, the parser could
* land right in the middle of a lookahead decision. Getting
* all *possible* tokens given a partial input stream is a separate
* computation. See https://github.com/antlr/antlr4/issues/1428
*
* For this function, we are specifying an ATN state and call stack to compute
* what token(s) can come next and specifically: outside of a lookahead decision.
* That is what you want for error reporting and recovery upon parse error.
*
* @param stateNumber the ATN state number
* @param context the full parse context
* @return The set of potentially valid input symbols which could follow the
* specified state in the specified context.
* @throws IllegalArgumentException if the ATN does not contain a state with
* number {@code stateNumber}
*/
public IntervalSet getExpectedTokens(int stateNumber, RuleContext context) {
if (stateNumber < 0 || stateNumber >= states.size()) {
throw new IllegalArgumentException("Invalid state number.");
}
RuleContext ctx = context;
ATNState s = states.get(stateNumber);
IntervalSet following = nextTokens(s);
if (!following.contains(Token.EPSILON)) {
return following;
}
IntervalSet expected = new IntervalSet();
expected.addAll(following);
expected.remove(Token.EPSILON);
while (ctx != null && ctx.invokingState >= 0 && following.contains(Token.EPSILON)) {
ATNState invokingState = states.get(ctx.invokingState);
RuleTransition rt = (RuleTransition) invokingState.transition(0);
following = nextTokens(rt.followState);
expected.addAll(following);
expected.remove(Token.EPSILON);
ctx = ctx.parent;
}
if (following.contains(Token.EPSILON)) {
expected.add(Token.EOF);
}
return expected;
}
use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.
the class Grammar method getStateToGrammarRegionMap.
public static Map<Integer, Interval> getStateToGrammarRegionMap(GrammarRootAST ast, IntervalSet grammarTokenTypes) {
Map<Integer, Interval> stateToGrammarRegionMap = new HashMap<Integer, Interval>();
if (ast == null)
return stateToGrammarRegionMap;
List<GrammarAST> nodes = ast.getNodesWithType(grammarTokenTypes);
for (GrammarAST n : nodes) {
if (n.atnState != null) {
Interval tokenRegion = Interval.of(n.getTokenStartIndex(), n.getTokenStopIndex());
org.antlr.runtime.tree.Tree ruleNode = null;
// RULEs, BLOCKs of transformed recursive rules point to original token interval
switch(n.getType()) {
case ANTLRParser.RULE:
ruleNode = n;
break;
case ANTLRParser.BLOCK:
case ANTLRParser.CLOSURE:
ruleNode = n.getAncestor(ANTLRParser.RULE);
break;
}
if (ruleNode instanceof RuleAST) {
String ruleName = ((RuleAST) ruleNode).getRuleName();
Rule r = ast.g.getRule(ruleName);
if (r instanceof LeftRecursiveRule) {
RuleAST originalAST = ((LeftRecursiveRule) r).getOriginalAST();
tokenRegion = Interval.of(originalAST.getTokenStartIndex(), originalAST.getTokenStopIndex());
}
}
stateToGrammarRegionMap.put(n.atnState.stateNumber, tokenRegion);
}
}
return stateToGrammarRegionMap;
}
use of org.antlr.v4.runtime.misc.IntervalSet in project antlr4 by antlr.
the class UnicodeDataTemplateController method addTR35ExtendedPictographicPropertyCodesToCodePointRanges.
private static void addTR35ExtendedPictographicPropertyCodesToCodePointRanges(Map<String, IntervalSet> propertyCodePointRanges) {
IntervalSet set = new IntervalSet();
// Generated using scripts/parse-extended-pictographic/parse.py
set.add(0x1F774, 0x1F77F);
set.add(0x2700, 0x2701);
set.add(0x2703, 0x2704);
set.add(0x270E);
set.add(0x2710, 0x2711);
set.add(0x2765, 0x2767);
set.add(0x1F030, 0x1F093);
set.add(0x1F094, 0x1F09F);
set.add(0x1F10D, 0x1F10F);
set.add(0x1F12F);
set.add(0x1F16C, 0x1F16F);
set.add(0x1F1AD, 0x1F1E5);
set.add(0x1F260, 0x1F265);
set.add(0x1F203, 0x1F20F);
set.add(0x1F23C, 0x1F23F);
set.add(0x1F249, 0x1F24F);
set.add(0x1F252, 0x1F25F);
set.add(0x1F266, 0x1F2FF);
set.add(0x1F7D5, 0x1F7FF);
set.add(0x1F000, 0x1F003);
set.add(0x1F005, 0x1F02B);
set.add(0x1F02C, 0x1F02F);
set.add(0x1F322, 0x1F323);
set.add(0x1F394, 0x1F395);
set.add(0x1F398);
set.add(0x1F39C, 0x1F39D);
set.add(0x1F3F1, 0x1F3F2);
set.add(0x1F3F6);
set.add(0x1F4FE);
set.add(0x1F53E, 0x1F548);
set.add(0x1F54F);
set.add(0x1F568, 0x1F56E);
set.add(0x1F571, 0x1F572);
set.add(0x1F57B, 0x1F586);
set.add(0x1F588, 0x1F589);
set.add(0x1F58E, 0x1F58F);
set.add(0x1F591, 0x1F594);
set.add(0x1F597, 0x1F5A3);
set.add(0x1F5A6, 0x1F5A7);
set.add(0x1F5A9, 0x1F5B0);
set.add(0x1F5B3, 0x1F5BB);
set.add(0x1F5BD, 0x1F5C1);
set.add(0x1F5C5, 0x1F5D0);
set.add(0x1F5D4, 0x1F5DB);
set.add(0x1F5DF, 0x1F5E0);
set.add(0x1F5E2);
set.add(0x1F5E4, 0x1F5E7);
set.add(0x1F5E9, 0x1F5EE);
set.add(0x1F5F0, 0x1F5F2);
set.add(0x1F5F4, 0x1F5F9);
set.add(0x2605);
set.add(0x2607, 0x260D);
set.add(0x260F, 0x2610);
set.add(0x2612);
set.add(0x2616, 0x2617);
set.add(0x2619, 0x261C);
set.add(0x261E, 0x261F);
set.add(0x2621);
set.add(0x2624, 0x2625);
set.add(0x2627, 0x2629);
set.add(0x262B, 0x262D);
set.add(0x2630, 0x2637);
set.add(0x263B, 0x2647);
set.add(0x2654, 0x265F);
set.add(0x2661, 0x2662);
set.add(0x2664);
set.add(0x2667);
set.add(0x2669, 0x267A);
set.add(0x267C, 0x267E);
set.add(0x2680, 0x2691);
set.add(0x2695);
set.add(0x2698);
set.add(0x269A);
set.add(0x269D, 0x269F);
set.add(0x26A2, 0x26A9);
set.add(0x26AC, 0x26AF);
set.add(0x26B2, 0x26BC);
set.add(0x26BF, 0x26C3);
set.add(0x26C6, 0x26C7);
set.add(0x26C9, 0x26CD);
set.add(0x26D0);
set.add(0x26D2);
set.add(0x26D5, 0x26E8);
set.add(0x26EB, 0x26EF);
set.add(0x26F6);
set.add(0x26FB, 0x26FC);
set.add(0x26FE, 0x26FF);
set.add(0x2388);
set.add(0x1FA00, 0x1FFFD);
set.add(0x1F0A0, 0x1F0AE);
set.add(0x1F0B1, 0x1F0BF);
set.add(0x1F0C1, 0x1F0CF);
set.add(0x1F0D1, 0x1F0F5);
set.add(0x1F0AF, 0x1F0B0);
set.add(0x1F0C0);
set.add(0x1F0D0);
set.add(0x1F0F6, 0x1F0FF);
set.add(0x1F80C, 0x1F80F);
set.add(0x1F848, 0x1F84F);
set.add(0x1F85A, 0x1F85F);
set.add(0x1F888, 0x1F88F);
set.add(0x1F8AE, 0x1F8FF);
set.add(0x1F900, 0x1F90B);
set.add(0x1F91F);
set.add(0x1F928, 0x1F92F);
set.add(0x1F931, 0x1F932);
set.add(0x1F94C);
set.add(0x1F95F, 0x1F96B);
set.add(0x1F992, 0x1F997);
set.add(0x1F9D0, 0x1F9E6);
set.add(0x1F90C, 0x1F90F);
set.add(0x1F93F);
set.add(0x1F94D, 0x1F94F);
set.add(0x1F96C, 0x1F97F);
set.add(0x1F998, 0x1F9BF);
set.add(0x1F9C1, 0x1F9CF);
set.add(0x1F9E7, 0x1F9FF);
set.add(0x1F6C6, 0x1F6CA);
set.add(0x1F6D3, 0x1F6D4);
set.add(0x1F6E6, 0x1F6E8);
set.add(0x1F6EA);
set.add(0x1F6F1, 0x1F6F2);
set.add(0x1F6F7, 0x1F6F8);
set.add(0x1F6D5, 0x1F6DF);
set.add(0x1F6ED, 0x1F6EF);
set.add(0x1F6F9, 0x1F6FF);
propertyCodePointRanges.put("Extended_Pictographic", set);
UnicodeSet emojiRKUnicodeSet = new UnicodeSet("[\\p{GCB=Regional_Indicator}\\*#0-9\\u00a9\\u00ae\\u2122\\u3030\\u303d]");
IntervalSet emojiRKIntervalSet = new IntervalSet();
addUnicodeSetToIntervalSet(emojiRKUnicodeSet, emojiRKIntervalSet);
propertyCodePointRanges.put("EmojiRK", emojiRKIntervalSet);
UnicodeSet emojiNRKUnicodeSet = new UnicodeSet("[\\p{Emoji=Yes}]");
emojiNRKUnicodeSet.removeAll(emojiRKUnicodeSet);
IntervalSet emojiNRKIntervalSet = new IntervalSet();
addUnicodeSetToIntervalSet(emojiNRKUnicodeSet, emojiNRKIntervalSet);
propertyCodePointRanges.put("EmojiNRK", emojiNRKIntervalSet);
}
Aggregations