use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class ATNOptimizer method optimizeSets.
private static void optimizeSets(Grammar g, ATN atn) {
if (g.isParser()) {
// parser codegen doesn't currently support SetTransition
return;
}
int removedStates = 0;
List<DecisionState> decisions = atn.decisionToState;
for (DecisionState decision : decisions) {
if (decision.ruleIndex >= 0) {
Rule rule = g.getRule(decision.ruleIndex);
if (Character.isLowerCase(rule.name.charAt(0))) {
// parser codegen doesn't currently support SetTransition
continue;
}
}
IntervalSet setTransitions = new IntervalSet();
for (int i = 0; i < decision.getNumberOfTransitions(); i++) {
Transition epsTransition = decision.transition(i);
if (!(epsTransition instanceof EpsilonTransition)) {
continue;
}
if (epsTransition.target.getNumberOfTransitions() != 1) {
continue;
}
Transition transition = epsTransition.target.transition(0);
if (!(transition.target instanceof BlockEndState)) {
continue;
}
if (transition instanceof NotSetTransition) {
// TODO: not yet implemented
continue;
}
if (transition instanceof AtomTransition || transition instanceof RangeTransition || transition instanceof SetTransition) {
setTransitions.add(i);
}
}
// due to min alt resolution policies, can only collapse sequential alts
for (int i = setTransitions.getIntervals().size() - 1; i >= 0; i--) {
Interval interval = setTransitions.getIntervals().get(i);
if (interval.length() <= 1) {
continue;
}
ATNState blockEndState = decision.transition(interval.a).target.transition(0).target;
IntervalSet matchSet = new IntervalSet();
for (int j = interval.a; j <= interval.b; j++) {
Transition matchTransition = decision.transition(j).target.transition(0);
if (matchTransition instanceof NotSetTransition) {
throw new UnsupportedOperationException("Not yet implemented.");
}
IntervalSet set = matchTransition.label();
List<Interval> intervals = set.getIntervals();
int n = intervals.size();
for (int k = 0; k < n; k++) {
Interval setInterval = intervals.get(k);
int a = setInterval.a;
int b = setInterval.b;
if (a != -1 && b != -1) {
for (int v = a; v <= b; v++) {
if (matchSet.contains(v)) {
// TODO: Token is missing (i.e. position in source will not be displayed).
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, null, CharSupport.getANTLRCharLiteralForChar(v), CharSupport.getIntervalSetEscapedString(matchSet));
break;
}
}
}
}
matchSet.addAll(set);
}
Transition newTransition;
if (matchSet.getIntervals().size() == 1) {
if (matchSet.size() == 1) {
newTransition = CodePointTransitions.createWithCodePoint(blockEndState, matchSet.getMinElement());
} else {
Interval matchInterval = matchSet.getIntervals().get(0);
newTransition = CodePointTransitions.createWithCodePointRange(blockEndState, matchInterval.a, matchInterval.b);
}
} else {
newTransition = new SetTransition(blockEndState, matchSet);
}
decision.transition(interval.a).target.setTransition(0, newTransition);
for (int j = interval.a + 1; j <= interval.b; j++) {
Transition removed = decision.removeTransition(interval.a + 1);
atn.removeState(removed.target);
removedStates++;
}
}
}
// System.out.println("ATN optimizer removed " + removedStates + " states by collapsing sets.");
}
use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class LexerATNFactory method stringLiteral.
/**
* For a lexer, a string is a sequence of char to match. That is,
* "fog" is treated as 'f' 'o' 'g' not as a single transition in
* the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
* for n characters.
*/
@Override
public Handle stringLiteral(TerminalAST stringLiteralAST) {
String chars = stringLiteralAST.getText();
ATNState left = newState(stringLiteralAST);
ATNState right;
String s = CharSupport.getStringFromGrammarStringLiteral(chars);
if (s == null) {
// the lexer will already have given an error
return new Handle(left, left);
}
int n = s.length();
ATNState prev = left;
right = null;
for (int i = 0; i < n; ) {
right = newState(stringLiteralAST);
int codePoint = s.codePointAt(i);
prev.addTransition(CodePointTransitions.createWithCodePoint(right, codePoint));
prev = right;
i += Character.charCount(codePoint);
}
stringLiteralAST.atnState = left;
return new Handle(left, right);
}
use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class LexerATNFactory method charSetLiteral.
/**
* [Aa\t \u1234a-z\]\p{Letter}\-] char sets
*/
@Override
public Handle charSetLiteral(GrammarAST charSetAST) {
ATNState left = newState(charSetAST);
ATNState right = newState(charSetAST);
IntervalSet set = getSetFromCharSetLiteral(charSetAST);
left.addTransition(new SetTransition(right, set));
charSetAST.atnState = left;
return new Handle(left, right);
}
use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class LexerATNFactory method tokenRef.
@Override
public Handle tokenRef(TerminalAST node) {
// Ref to EOF in lexer yields char transition on -1
if (node.getText().equals("EOF")) {
ATNState left = newState(node);
ATNState right = newState(node);
left.addTransition(new AtomTransition(right, IntStream.EOF));
return new Handle(left, right);
}
return _ruleRef(node);
}
use of org.antlr.v4.runtime.atn.ATNState in project antlr4 by tunnelvisionlabs.
the class LexerATNFactory method range.
@Override
public Handle range(GrammarAST a, GrammarAST b) {
ATNState left = newState(a);
ATNState right = newState(b);
int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
checkRange(a, b, t1, t2);
left.addTransition(CodePointTransitions.createWithCodePointRange(right, t1, t2));
a.atnState = left;
b.atnState = left;
return new Handle(left, right);
}
Aggregations