Search in sources :

Example 6 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class ATNSerializer method serialize.

/** Serialize state descriptors, edge descriptors, and decision→state map
	 *  into list of ints:
	 *
	 * 		grammar-type, (ANTLRParser.LEXER, ...)
	 *  	max token type,
	 *  	num states,
	 *  	state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
	 *  	num rules,
	 *  	rule-1-start-state rule-1-args, rule-2-start-state  rule-2-args, ...
	 *  	(args are token type,actionIndex in lexer else 0,0)
	 *      num modes,
	 *      mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
	 *      num unicode-bmp-sets
	 *      bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
	 *      num unicode-smp-sets
	 *      smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
	 *	num total edges,
	 *      src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
	 *      num decisions,
	 *      decision-0-start-state, decision-1-start-state, ...
	 *
	 *  Convenient to pack into unsigned shorts to make as Java string.
	 */
public IntegerList serialize() {
    IntegerList data = new IntegerList();
    data.add(ATNDeserializer.SERIALIZED_VERSION);
    serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
    // convert grammar type to ATN const to avoid dependence on ANTLRParser
    data.add(atn.grammarType.ordinal());
    data.add(atn.maxTokenType);
    int nedges = 0;
    // Note that we use a LinkedHashMap as a set to
    // maintain insertion order while deduplicating
    // entries with the same key.
    Map<IntervalSet, Boolean> sets = new LinkedHashMap<>();
    // dump states, count edges and collect sets while doing so
    IntegerList nonGreedyStates = new IntegerList();
    IntegerList precedenceStates = new IntegerList();
    data.add(atn.states.size());
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            data.add(ATNState.INVALID_TYPE);
            continue;
        }
        int stateType = s.getStateType();
        if (s instanceof DecisionState && ((DecisionState) s).nonGreedy) {
            nonGreedyStates.add(s.stateNumber);
        }
        if (s instanceof RuleStartState && ((RuleStartState) s).isLeftRecursiveRule) {
            precedenceStates.add(s.stateNumber);
        }
        data.add(stateType);
        if (s.ruleIndex == -1) {
            data.add(Character.MAX_VALUE);
        } else {
            data.add(s.ruleIndex);
        }
        if (s.getStateType() == ATNState.LOOP_END) {
            data.add(((LoopEndState) s).loopBackState.stateNumber);
        } else if (s instanceof BlockStartState) {
            data.add(((BlockStartState) s).endState.stateNumber);
        }
        if (s.getStateType() != ATNState.RULE_STOP) {
            // the deserializer can trivially derive these edges, so there's no need to serialize them
            nedges += s.getNumberOfTransitions();
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            int edgeType = Transition.serializationTypes.get(t.getClass());
            if (edgeType == Transition.SET || edgeType == Transition.NOT_SET) {
                SetTransition st = (SetTransition) t;
                sets.put(st.set, true);
            }
        }
    }
    // non-greedy states
    data.add(nonGreedyStates.size());
    for (int i = 0; i < nonGreedyStates.size(); i++) {
        data.add(nonGreedyStates.get(i));
    }
    // precedence states
    data.add(precedenceStates.size());
    for (int i = 0; i < precedenceStates.size(); i++) {
        data.add(precedenceStates.get(i));
    }
    int nrules = atn.ruleToStartState.length;
    data.add(nrules);
    for (int r = 0; r < nrules; r++) {
        ATNState ruleStartState = atn.ruleToStartState[r];
        data.add(ruleStartState.stateNumber);
        if (atn.grammarType == ATNType.LEXER) {
            if (atn.ruleToTokenType[r] == Token.EOF) {
                data.add(Character.MAX_VALUE);
            } else {
                data.add(atn.ruleToTokenType[r]);
            }
        }
    }
    int nmodes = atn.modeToStartState.size();
    data.add(nmodes);
    if (nmodes > 0) {
        for (ATNState modeStartState : atn.modeToStartState) {
            data.add(modeStartState.stateNumber);
        }
    }
    List<IntervalSet> bmpSets = new ArrayList<>();
    List<IntervalSet> smpSets = new ArrayList<>();
    for (IntervalSet set : sets.keySet()) {
        if (set.getMaxElement() <= Character.MAX_VALUE) {
            bmpSets.add(set);
        } else {
            smpSets.add(set);
        }
    }
    serializeSets(data, bmpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            data.add(cp);
        }
    });
    serializeSets(data, smpSets, new CodePointSerializer() {

        @Override
        public void serializeCodePoint(IntegerList data, int cp) {
            serializeInt(data, cp);
        }
    });
    Map<IntervalSet, Integer> setIndices = new HashMap<>();
    int setIndex = 0;
    for (IntervalSet bmpSet : bmpSets) {
        setIndices.put(bmpSet, setIndex++);
    }
    for (IntervalSet smpSet : smpSets) {
        setIndices.put(smpSet, setIndex++);
    }
    data.add(nedges);
    for (ATNState s : atn.states) {
        if (s == null) {
            // might be optimized away
            continue;
        }
        if (s.getStateType() == ATNState.RULE_STOP) {
            continue;
        }
        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
            Transition t = s.transition(i);
            if (atn.states.get(t.target.stateNumber) == null) {
                throw new IllegalStateException("Cannot serialize a transition to a removed state.");
            }
            int src = s.stateNumber;
            int trg = t.target.stateNumber;
            int edgeType = Transition.serializationTypes.get(t.getClass());
            int arg1 = 0;
            int arg2 = 0;
            int arg3 = 0;
            switch(edgeType) {
                case Transition.RULE:
                    trg = ((RuleTransition) t).followState.stateNumber;
                    arg1 = ((RuleTransition) t).target.stateNumber;
                    arg2 = ((RuleTransition) t).ruleIndex;
                    arg3 = ((RuleTransition) t).precedence;
                    break;
                case Transition.PRECEDENCE:
                    PrecedencePredicateTransition ppt = (PrecedencePredicateTransition) t;
                    arg1 = ppt.precedence;
                    break;
                case Transition.PREDICATE:
                    PredicateTransition pt = (PredicateTransition) t;
                    arg1 = pt.ruleIndex;
                    arg2 = pt.predIndex;
                    arg3 = pt.isCtxDependent ? 1 : 0;
                    break;
                case Transition.RANGE:
                    arg1 = ((RangeTransition) t).from;
                    arg2 = ((RangeTransition) t).to;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ATOM:
                    arg1 = ((AtomTransition) t).label;
                    if (arg1 == Token.EOF) {
                        arg1 = 0;
                        arg3 = 1;
                    }
                    break;
                case Transition.ACTION:
                    ActionTransition at = (ActionTransition) t;
                    arg1 = at.ruleIndex;
                    arg2 = at.actionIndex;
                    if (arg2 == -1) {
                        arg2 = 0xFFFF;
                    }
                    arg3 = at.isCtxDependent ? 1 : 0;
                    break;
                case Transition.SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.NOT_SET:
                    arg1 = setIndices.get(((SetTransition) t).set);
                    break;
                case Transition.WILDCARD:
                    break;
            }
            data.add(src);
            data.add(trg);
            data.add(edgeType);
            data.add(arg1);
            data.add(arg2);
            data.add(arg3);
        }
    }
    int ndecisions = atn.decisionToState.size();
    data.add(ndecisions);
    for (DecisionState decStartState : atn.decisionToState) {
        data.add(decStartState.stateNumber);
    }
    //
    if (atn.grammarType == ATNType.LEXER) {
        data.add(atn.lexerActions.length);
        for (LexerAction action : atn.lexerActions) {
            data.add(action.getActionType().ordinal());
            switch(action.getActionType()) {
                case CHANNEL:
                    int channel = ((LexerChannelAction) action).getChannel();
                    data.add(channel != -1 ? channel : 0xFFFF);
                    data.add(0);
                    break;
                case CUSTOM:
                    int ruleIndex = ((LexerCustomAction) action).getRuleIndex();
                    int actionIndex = ((LexerCustomAction) action).getActionIndex();
                    data.add(ruleIndex != -1 ? ruleIndex : 0xFFFF);
                    data.add(actionIndex != -1 ? actionIndex : 0xFFFF);
                    break;
                case MODE:
                    int mode = ((LexerModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case MORE:
                    data.add(0);
                    data.add(0);
                    break;
                case POP_MODE:
                    data.add(0);
                    data.add(0);
                    break;
                case PUSH_MODE:
                    mode = ((LexerPushModeAction) action).getMode();
                    data.add(mode != -1 ? mode : 0xFFFF);
                    data.add(0);
                    break;
                case SKIP:
                    data.add(0);
                    data.add(0);
                    break;
                case TYPE:
                    int type = ((LexerTypeAction) action).getType();
                    data.add(type != -1 ? type : 0xFFFF);
                    data.add(0);
                    break;
                default:
                    String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
                    throw new IllegalArgumentException(message);
            }
        }
    }
    // don't adjust the first value since that's the version number
    for (int i = 1; i < data.size(); i++) {
        if (data.get(i) < Character.MIN_VALUE || data.get(i) > Character.MAX_VALUE) {
            throw new UnsupportedOperationException("Serialized ATN data element " + data.get(i) + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
        }
        int value = (data.get(i) + 2) & 0xFFFF;
        data.set(i, value);
    }
    return data;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) IntegerList(org.antlr.v4.runtime.misc.IntegerList) IntervalSet(org.antlr.v4.runtime.misc.IntervalSet)

Example 7 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class LL1Analyzer method LOOK.

/**
	 * Compute set of tokens that can follow {@code s} in the ATN in the
	 * specified {@code ctx}.
	 *
	 * <p>If {@code ctx} is {@code null} and the end of the rule containing
	 * {@code s} is reached, {@link Token#EPSILON} is added to the result set.
	 * If {@code ctx} is not {@code null} and the end of the outermost rule is
	 * reached, {@link Token#EOF} is added to the result set.</p>
	 *
	 * @param s the ATN state
	 * @param stopState the ATN state to stop at. This can be a
	 * {@link BlockEndState} to detect epsilon paths through a closure.
	 * @param ctx the complete parser context, or {@code null} if the context
	 * should be ignored
	 *
	 * @return The set of tokens that can follow {@code s} in the ATN in the
	 * specified {@code ctx}.
	 */
public IntervalSet LOOK(ATNState s, ATNState stopState, RuleContext ctx) {
    IntervalSet r = new IntervalSet();
    // ignore preds; get all lookahead
    boolean seeThruPreds = true;
    PredictionContext lookContext = ctx != null ? PredictionContext.fromRuleContext(s.atn, ctx) : null;
    _LOOK(s, stopState, lookContext, r, new HashSet<ATNConfig>(), new BitSet(), seeThruPreds, true);
    return r;
}
Also used : IntervalSet(org.antlr.v4.runtime.misc.IntervalSet) BitSet(java.util.BitSet)

Example 8 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class LexerATNSimulator method execATN.

protected int execATN(CharStream input, DFAState ds0) {
    //System.out.println("enter exec index "+input.index()+" from "+ds0.configs);
    if (debug) {
        System.out.format(Locale.getDefault(), "start state closure=%s\n", ds0.configs);
    }
    if (ds0.isAcceptState) {
        // allow zero-length tokens
        captureSimState(prevAccept, input, ds0);
    }
    int t = input.LA(1);
    // s is current/from DFA state
    DFAState s = ds0;
    while (true) {
        // while more work
        if (debug) {
            System.out.format(Locale.getDefault(), "execATN loop starting closure: %s\n", s.configs);
        }
        // As we move src->trg, src->trg, we keep track of the previous trg to
        // avoid looking up the DFA state again, which is expensive.
        // If the previous target was already part of the DFA, we might
        // be able to avoid doing a reach operation upon t. If s!=null,
        // it means that semantic predicates didn't prevent us from
        // creating a DFA state. Once we know s!=null, we check to see if
        // the DFA state has an edge already for t. If so, we can just reuse
        // it's configuration set; there's no point in re-computing it.
        // This is kind of like doing DFA simulation within the ATN
        // simulation because DFA simulation is really just a way to avoid
        // computing reach/closure sets. Technically, once we know that
        // we have a previously added DFA state, we could jump over to
        // the DFA simulator. But, that would mean popping back and forth
        // a lot and making things more complicated algorithmically.
        // This optimization makes a lot of sense for loops within DFA.
        // A character will take us back to an existing DFA state
        // that already has lots of edges out of it. e.g., .* in comments.
        DFAState target = getExistingTargetState(s, t);
        if (target == null) {
            target = computeTargetState(input, s, t);
        }
        if (target == ERROR) {
            break;
        }
        // end of the token.
        if (t != IntStream.EOF) {
            consume(input);
        }
        if (target.isAcceptState) {
            captureSimState(prevAccept, input, target);
            if (t == IntStream.EOF) {
                break;
            }
        }
        t = input.LA(1);
        // flip; current DFA target becomes new src/from state
        s = target;
    }
    return failOrAccept(prevAccept, input, s.configs, t);
}
Also used : DFAState(org.antlr.v4.runtime.dfa.DFAState)

Example 9 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class ParseTreePatternMatcher method matchImpl.

// ---- SUPPORT CODE ----
/**
	 * Recursively walk {@code tree} against {@code patternTree}, filling
	 * {@code match.}{@link ParseTreeMatch#labels labels}.
	 *
	 * @return the first node encountered in {@code tree} which does not match
	 * a corresponding node in {@code patternTree}, or {@code null} if the match
	 * was successful. The specific node returned depends on the matching
	 * algorithm used by the implementation, and may be overridden.
	 */
protected ParseTree matchImpl(ParseTree tree, ParseTree patternTree, MultiMap<String, ParseTree> labels) {
    if (tree == null) {
        throw new IllegalArgumentException("tree cannot be null");
    }
    if (patternTree == null) {
        throw new IllegalArgumentException("patternTree cannot be null");
    }
    // x and <ID>, x and y, or x and x; or could be mismatched types
    if (tree instanceof TerminalNode && patternTree instanceof TerminalNode) {
        TerminalNode t1 = (TerminalNode) tree;
        TerminalNode t2 = (TerminalNode) patternTree;
        ParseTree mismatchedNode = null;
        // both are tokens and they have same type
        if (t1.getSymbol().getType() == t2.getSymbol().getType()) {
            if (t2.getSymbol() instanceof TokenTagToken) {
                // x and <ID>
                TokenTagToken tokenTagToken = (TokenTagToken) t2.getSymbol();
                // track label->list-of-nodes for both token name and label (if any)
                labels.map(tokenTagToken.getTokenName(), tree);
                if (tokenTagToken.getLabel() != null) {
                    labels.map(tokenTagToken.getLabel(), tree);
                }
            } else if (t1.getText().equals(t2.getText())) {
            // x and x
            } else {
                // x and y
                if (mismatchedNode == null) {
                    mismatchedNode = t1;
                }
            }
        } else {
            if (mismatchedNode == null) {
                mismatchedNode = t1;
            }
        }
        return mismatchedNode;
    }
    if (tree instanceof ParserRuleContext && patternTree instanceof ParserRuleContext) {
        ParserRuleContext r1 = (ParserRuleContext) tree;
        ParserRuleContext r2 = (ParserRuleContext) patternTree;
        ParseTree mismatchedNode = null;
        // (expr ...) and <expr>
        RuleTagToken ruleTagToken = getRuleTagToken(r2);
        if (ruleTagToken != null) {
            ParseTreeMatch m = null;
            if (r1.getRuleContext().getRuleIndex() == r2.getRuleContext().getRuleIndex()) {
                // track label->list-of-nodes for both rule name and label (if any)
                labels.map(ruleTagToken.getRuleName(), tree);
                if (ruleTagToken.getLabel() != null) {
                    labels.map(ruleTagToken.getLabel(), tree);
                }
            } else {
                if (mismatchedNode == null) {
                    mismatchedNode = r1;
                }
            }
            return mismatchedNode;
        }
        // (expr ...) and (expr ...)
        if (r1.getChildCount() != r2.getChildCount()) {
            if (mismatchedNode == null) {
                mismatchedNode = r1;
            }
            return mismatchedNode;
        }
        int n = r1.getChildCount();
        for (int i = 0; i < n; i++) {
            ParseTree childMatch = matchImpl(r1.getChild(i), patternTree.getChild(i), labels);
            if (childMatch != null) {
                return childMatch;
            }
        }
        return mismatchedNode;
    }
    // if nodes aren't both tokens or both rule nodes, can't match
    return tree;
}
Also used : ParserRuleContext(org.antlr.v4.runtime.ParserRuleContext) TerminalNode(org.antlr.v4.runtime.tree.TerminalNode) ParseTree(org.antlr.v4.runtime.tree.ParseTree)

Example 10 with Token

use of org.antlr.v4.runtime.Token in project antlr4 by antlr.

the class ParseTreePatternMatcher method tokenize.

public List<? extends Token> tokenize(String pattern) {
    // split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
    List<Chunk> chunks = split(pattern);
    // create token stream from text and tags
    List<Token> tokens = new ArrayList<Token>();
    for (Chunk chunk : chunks) {
        if (chunk instanceof TagChunk) {
            TagChunk tagChunk = (TagChunk) chunk;
            // add special rule token or conjure up new token from name
            if (Character.isUpperCase(tagChunk.getTag().charAt(0))) {
                Integer ttype = parser.getTokenType(tagChunk.getTag());
                if (ttype == Token.INVALID_TYPE) {
                    throw new IllegalArgumentException("Unknown token " + tagChunk.getTag() + " in pattern: " + pattern);
                }
                TokenTagToken t = new TokenTagToken(tagChunk.getTag(), ttype, tagChunk.getLabel());
                tokens.add(t);
            } else if (Character.isLowerCase(tagChunk.getTag().charAt(0))) {
                int ruleIndex = parser.getRuleIndex(tagChunk.getTag());
                if (ruleIndex == -1) {
                    throw new IllegalArgumentException("Unknown rule " + tagChunk.getTag() + " in pattern: " + pattern);
                }
                int ruleImaginaryTokenType = parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex];
                tokens.add(new RuleTagToken(tagChunk.getTag(), ruleImaginaryTokenType, tagChunk.getLabel()));
            } else {
                throw new IllegalArgumentException("invalid tag: " + tagChunk.getTag() + " in pattern: " + pattern);
            }
        } else {
            TextChunk textChunk = (TextChunk) chunk;
            ANTLRInputStream in = new ANTLRInputStream(textChunk.getText());
            lexer.setInputStream(in);
            Token t = lexer.nextToken();
            while (t.getType() != Token.EOF) {
                tokens.add(t);
                t = lexer.nextToken();
            }
        }
    }
    //		System.out.println("tokens="+tokens);
    return tokens;
}
Also used : ArrayList(java.util.ArrayList) Token(org.antlr.v4.runtime.Token) ANTLRInputStream(org.antlr.v4.runtime.ANTLRInputStream)

Aggregations

Token (org.antlr.v4.runtime.Token)37 Test (org.junit.Test)26 GrammarAST (org.antlr.v4.tool.ast.GrammarAST)18 IntervalSet (org.antlr.v4.runtime.misc.IntervalSet)16 ArrayList (java.util.ArrayList)14 ANTLRInputStream (org.antlr.v4.runtime.ANTLRInputStream)12 Grammar (org.antlr.v4.tool.Grammar)12 LexerGrammar (org.antlr.v4.tool.LexerGrammar)12 Token (org.antlr.runtime.Token)10 CommonTokenStream (org.antlr.v4.runtime.CommonTokenStream)10 TerminalNode (org.antlr.v4.runtime.tree.TerminalNode)10 CharStream (org.antlr.v4.runtime.CharStream)9 CommonToken (org.antlr.v4.runtime.CommonToken)8 ParserRuleContext (org.antlr.v4.runtime.ParserRuleContext)8 ParseTree (org.antlr.v4.runtime.tree.ParseTree)8 Rule (org.antlr.v4.tool.Rule)8 LexerInterpreter (org.antlr.v4.runtime.LexerInterpreter)7 TokenSource (org.antlr.v4.runtime.TokenSource)7 StringReader (java.io.StringReader)6 BaseRuntimeTest (org.antlr.v4.test.runtime.BaseRuntimeTest)6