Search in sources :

Example 1 with BreadthFirstSearch

use of priv.bajdcc.util.lexer.automata.BreadthFirstSearch in project jMiniLang by bajdcc.

the class NPA method generateNPA.

/**
 * 产生下推自动机
 */
private void generateNPA() {
    /* 下推自动机状态 */
    ArrayList<NPAStatus> NPAStatusList = new ArrayList<>();
    /* 文法自动机状态 */
    ArrayList<NGAStatus> NGAStatusList = new ArrayList<>();
    /* 下推自动机边(规则映射到NGA边) */
    HashMap<Rule, ArrayList<NGAEdge>> ruleEdgeMap = new HashMap<>();
    /* 遍历每条规则 */
    for (Entry<RuleItem, NGAStatus> entry : mapNGA.entrySet()) {
        RuleItem key = entry.getKey();
        NGAStatus value = entry.getValue();
        /* 保存规则 */
        arrRuleItems.add(key);
        /* 搜索当前规则中的所有状态 */
        ArrayList<NGAStatus> CurrentNGAStatusList = getNGAStatusClosure(new BreadthFirstSearch<>(), value);
        /* 搜索所有的边 */
        for (NGAStatus status : CurrentNGAStatusList) {
            /* 若边为非终结符边,则加入邻接表,终结符->带终结符的所有边 */
            status.outEdges.stream().filter(edge -> edge.data.kAction == NGAEdgeType.RULE).forEach(edge -> {
                Rule rule = edge.data.rule.rule;
                if (!ruleEdgeMap.containsKey(rule)) {
                    ruleEdgeMap.put(rule, new ArrayList<>());
                }
                ruleEdgeMap.get(rule).add(edge);
            });
        }
        /* 为所有的NGA状态构造对应的NPA状态,为一一对应 */
        for (NGAStatus status : CurrentNGAStatusList) {
            /* 保存NGA状态 */
            NGAStatusList.add(status);
            /* 新建NPA状态 */
            NPAStatus NPAStatus = new NPAStatus();
            NPAStatus.data.label = status.data.label;
            NPAStatus.data.iRuleItem = arrRuleItems.indexOf(key);
            NPAStatusList.add(NPAStatus);
        }
    }
    /* 遍历所有NPA状态 */
    for (int i = 0; i < NPAStatusList.size(); i++) {
        /* 获得NGA状态 */
        NGAStatus ngaStatus = NGAStatusList.get(i);
        /* 获得NPA状态 */
        NPAStatus npaStatus = NPAStatusList.get(i);
        /* 获得规则 */
        RuleItem ruleItem = arrRuleItems.get(npaStatus.data.iRuleItem);
        /* 检查是否为纯左递归,类似[A::=Aa]此类,无法直接添加纯左递归边,需要LA及归约 */
        if (!isLeftRecursiveStatus(ngaStatus, ruleItem.parent)) {
            /* 当前状态是否为初始状态且推导规则是否属于起始规则(无NGA入边) */
            boolean isInitRuleStatus = initRule == ruleItem.parent;
            /* 若是,则将当前状态对应的NPA状态加入初始状态表中 */
            if (ngaStatus.inEdges.isEmpty() && isInitRuleStatus) {
                arrInitStatusList.add(npaStatus);
            }
            /* 建立计算优先级使用的记号表,其中元素为从当前状态出发的Rule或Token边的First集(LA预查优先) */
            HashSet<Integer> tokenSet = new HashSet<>();
            /* 遍历文法自动机的所有边 */
            for (NGAEdge edge : ngaStatus.outEdges) {
                switch(edge.data.kAction) {
                    case EPSILON:
                        break;
                    case RULE:
                        /* 判断边是否为纯左递归 */
                        if (!isLeftRecursiveEdge(edge, ruleItem.parent)) {
                            for (RuleItem item : edge.data.rule.rule.arrRules) {
                                /* 起始状态 */
                                NGAStatus initItemStatus = mapNGA.get(item);
                                /* 判断状态是否为纯左递归 */
                                if (!isLeftRecursiveStatus(initItemStatus, item.parent)) {
                                    /* 添加Shift边,功能为将一条状态序号放入堆栈顶 */
                                    NPAEdge npaEdge = connect(npaStatus, NPAStatusList.get(NGAStatusList.indexOf(initItemStatus)));
                                    npaEdge.data.handler = edge.data.handler;
                                    npaEdge.data.action = edge.data.action;
                                    npaEdge.data.kAction = NPAEdgeType.SHIFT;
                                    npaEdge.data.inst = NPAInstruction.SHIFT;
                                    npaEdge.data.errorJump = NPAStatusList.get(NGAStatusList.indexOf(edge.end));
                                    /* 为移进项目构造LookAhead表,LA不吃字符,只是单纯压入新的状态(用于规约) */
                                    npaEdge.data.arrLookAhead = new HashSet<>();
                                    npaEdge.data.arrLookAhead.addAll(item.setFirstSetTokens.stream().filter(exp -> !tokenSet.contains(exp.id)).map(exp -> exp.id).collect(Collectors.toList()));
                                }
                            }
                            // 将当前非终结符的所有终结符First集加入tokenSet,以便非终结符的Move的LA操作(优先级)
                            tokenSet.addAll(edge.data.rule.rule.arrTokens.stream().map(exp -> exp.id).collect(Collectors.toList()));
                        }
                        break;
                    case TOKEN:
                        /* 添加Move边,功能为吃掉(匹配)一个终结符,若终结符不匹配,则报错(即不符合文法) */
                        NPAEdge npaEdge = connect(npaStatus, NPAStatusList.get(NGAStatusList.indexOf(edge.end)));
                        npaEdge.data.handler = edge.data.handler;
                        npaEdge.data.action = edge.data.action;
                        npaEdge.data.kAction = NPAEdgeType.MOVE;
                        npaEdge.data.iToken = edge.data.token.id;
                        npaEdge.data.iHandler = arrActions.indexOf(edge.data.action);
                        npaEdge.data.errorJump = npaEdge.end;
                        /* 根据StorageID配置指令 */
                        if (edge.data.iStorage != -1) {
                            npaEdge.data.inst = NPAInstruction.READ;
                            // 参数
                            npaEdge.data.iIndex = edge.data.iStorage;
                        } else {
                            npaEdge.data.inst = NPAInstruction.PASS;
                        }
                        /* 修改TokenSet */
                        if (tokenSet.contains(edge.data.token.id)) {
                            /* 使用LookAhead表 */
                            npaEdge.data.arrLookAhead = new HashSet<>();
                        } else {
                            tokenSet.add(edge.data.token.id);
                        }
                        break;
                    default:
                        break;
                }
            }
            /* 如果当前NGA状态是结束状态(此时要进行规约),则检查是否需要添加其他边 */
            if (ngaStatus.data.bFinal) {
                if (ruleEdgeMap.containsKey(ruleItem.parent)) {
                    /* 遍历文法自动机中附带了当前推导规则所属规则的边 */
                    ArrayList<NGAEdge> ruleEdges = ruleEdgeMap.get(// 当前规约的文法的非终结符为A,获得包含A的所有边
                    ruleItem.parent);
                    for (NGAEdge ngaEdge : ruleEdges) {
                        /* 判断纯左递归,冗长的表达式是为了获得当前边的所在推导式的起始非终结符 */
                        if (isLeftRecursiveEdge(ngaEdge, arrRuleItems.get(NPAStatusList.get(NGAStatusList.indexOf(ngaEdge.begin)).data.iRuleItem).parent)) {
                            /* 添加Left Recursion边(特殊的Reduce边) */
                            NPAEdge npaEdge = connect(npaStatus, NPAStatusList.get(NGAStatusList.indexOf(ngaEdge.end)));
                            npaEdge.data.kAction = NPAEdgeType.LEFT_RECURSION;
                            if (ngaEdge.data.iStorage != -1) {
                                npaEdge.data.inst = NPAInstruction.LEFT_RECURSION;
                                npaEdge.data.iIndex = ngaEdge.data.iStorage;
                            } else {
                                npaEdge.data.inst = NPAInstruction.LEFT_RECURSION_DISCARD;
                            }
                            // 规约的规则
                            npaEdge.data.iHandler = npaStatus.data.iRuleItem;
                            /* 为左递归构造Lookahead表(Follow集),若LA成功则进入左递归 */
                            npaEdge.data.arrLookAhead = new HashSet<>();
                            for (NGAEdge edge : ngaEdge.end.outEdges) {
                                /* 若出边为终结符,则直接加入(终结符First集仍是本身) */
                                if (edge.data.kAction == NGAEdgeType.TOKEN) {
                                    npaEdge.data.arrLookAhead.add(edge.data.token.id);
                                } else {
                                    /* 若出边为非终结符,则加入非终结符的First集 */
                                    npaEdge.data.arrLookAhead.addAll(edge.data.rule.rule.arrTokens.stream().map(exp -> exp.id).collect(Collectors.toList()));
                                }
                            }
                        } else {
                            /* 添加Reduce边 */
                            NPAEdge npaEdge = connect(npaStatus, NPAStatusList.get(NGAStatusList.indexOf(ngaEdge.end)));
                            npaEdge.data.kAction = NPAEdgeType.REDUCE;
                            npaEdge.data.status = NPAStatusList.get(NGAStatusList.indexOf(ngaEdge.begin));
                            if (ngaEdge.data.iStorage != -1) {
                                npaEdge.data.inst = NPAInstruction.TRANSLATE;
                                npaEdge.data.iIndex = ngaEdge.data.iStorage;
                            } else {
                                npaEdge.data.inst = NPAInstruction.TRANSLATE_DISCARD;
                            }
                            // 规约的规则
                            npaEdge.data.iHandler = npaStatus.data.iRuleItem;
                        }
                    }
                }
                if (isInitRuleStatus) {
                    /* 添加Finish边 */
                    NPAEdge npaEdge = connect(npaStatus, npaStatus);
                    npaEdge.data.kAction = NPAEdgeType.FINISH;
                    npaEdge.data.inst = NPAInstruction.TRANSLATE_FINISH;
                    npaEdge.data.iHandler = npaStatus.data.iRuleItem;
                }
            }
        }
    }
}
Also used : Iterator(java.util.Iterator) ISemanticAction(priv.bajdcc.LALR1.semantic.token.ISemanticAction) RuleItem(priv.bajdcc.LALR1.syntax.rule.RuleItem) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) BreadthFirstSearch(priv.bajdcc.util.lexer.automata.BreadthFirstSearch) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) NGA(priv.bajdcc.LALR1.syntax.automata.nga.NGA) RuleExp(priv.bajdcc.LALR1.syntax.exp.RuleExp) NGAEdge(priv.bajdcc.LALR1.syntax.automata.nga.NGAEdge) NGAStatus(priv.bajdcc.LALR1.syntax.automata.nga.NGAStatus) TokenExp(priv.bajdcc.LALR1.syntax.exp.TokenExp) Entry(java.util.Map.Entry) Rule(priv.bajdcc.LALR1.syntax.rule.Rule) NGAEdgeType(priv.bajdcc.LALR1.syntax.automata.nga.NGAEdgeType) HashMap(java.util.HashMap) NGAEdge(priv.bajdcc.LALR1.syntax.automata.nga.NGAEdge) ArrayList(java.util.ArrayList) NGAStatus(priv.bajdcc.LALR1.syntax.automata.nga.NGAStatus) RuleItem(priv.bajdcc.LALR1.syntax.rule.RuleItem) Rule(priv.bajdcc.LALR1.syntax.rule.Rule) HashSet(java.util.HashSet)

Example 2 with BreadthFirstSearch

use of priv.bajdcc.util.lexer.automata.BreadthFirstSearch in project jMiniLang by bajdcc.

the class NGAStatus method visit.

/**
 * 用于遍历包括该状态在内的所有状态(连通),结果存放在PATH中
 *
 * @param bfs
 *            遍历算法
 */
public void visit(BreadthFirstSearch<NGAEdge, NGAStatus> bfs) {
    ArrayList<NGAStatus> stack = bfs.arrStatus;
    HashSet<NGAStatus> set = new HashSet<>();
    stack.clear();
    set.add(this);
    stack.add(this);
    for (int i = 0; i < stack.size(); i++) {
        // 遍历每个状态
        NGAStatus status = stack.get(i);
        VisitBag bag = new VisitBag();
        bfs.visitBegin(status, bag);
        if (bag.bVisitChildren) {
            // 遍历状态的出边
            // 边未被访问,且边类型符合要求
            status.outEdges.stream().filter(edge -> !set.contains(edge.end) && bfs.testEdge(edge)).forEach(edge -> {
                // 边未被访问,且边类型符合要求
                stack.add(edge.end);
                set.add(edge.end);
            });
        }
        if (bag.bVisitEnd) {
            bfs.visitEnd(status);
        }
    }
}
Also used : HashSet(java.util.HashSet) VisitBag(priv.bajdcc.util.VisitBag) BreadthFirstSearch(priv.bajdcc.util.lexer.automata.BreadthFirstSearch) ArrayList(java.util.ArrayList) VisitBag(priv.bajdcc.util.VisitBag) HashSet(java.util.HashSet)

Example 3 with BreadthFirstSearch

use of priv.bajdcc.util.lexer.automata.BreadthFirstSearch in project jMiniLang by bajdcc.

the class NFAStatus method visit.

/**
 * 用于遍历包括该状态在内的所有状态(连通),结果存放在PATH中
 *
 * @param bfs
 *            遍历算法
 */
public void visit(BreadthFirstSearch<NFAEdge, NFAStatus> bfs) {
    ArrayList<NFAStatus> stack = bfs.arrStatus;
    HashSet<NFAStatus> set = new HashSet<>();
    stack.clear();
    set.add(this);
    stack.add(this);
    for (int i = 0; i < stack.size(); i++) {
        // 遍历每个状态
        NFAStatus status = stack.get(i);
        VisitBag bag = new VisitBag();
        bfs.visitBegin(status, bag);
        if (bag.bVisitChildren) {
            // 遍历状态的出边
            // 边未被访问,且边类型符合要求
            status.outEdges.stream().filter(edge -> !set.contains(edge.end) && bfs.testEdge(edge)).forEach(edge -> {
                // 边未被访问,且边类型符合要求
                stack.add(edge.end);
                set.add(edge.end);
            });
        }
        if (bag.bVisitEnd) {
            bfs.visitEnd(status);
        }
    }
}
Also used : HashSet(java.util.HashSet) VisitBag(priv.bajdcc.util.VisitBag) BreadthFirstSearch(priv.bajdcc.util.lexer.automata.BreadthFirstSearch) ArrayList(java.util.ArrayList) VisitBag(priv.bajdcc.util.VisitBag) HashSet(java.util.HashSet)

Example 4 with BreadthFirstSearch

use of priv.bajdcc.util.lexer.automata.BreadthFirstSearch in project jMiniLang by bajdcc.

the class DFA method deleteEpsilonEdges.

/**
 * 去除Epsilon边
 */
private void deleteEpsilonEdges() {
    ArrayList<NFAStatus> NFAStatusList = getNFAStatusClosure(new BreadthFirstSearch<>(), // 获取状态闭包
    nfa.begin);
    // 不可到达状态集合
    ArrayList<NFAStatus> unaccessiableList = new ArrayList<>();
    for (NFAStatus status : NFAStatusList) {
        boolean epsilon = true;
        for (NFAEdge edge : status.inEdges) {
            if (edge.data.kAction != EdgeType.EPSILON) {
                // 不是Epsilon边
                // 当前可到达
                epsilon = false;
                break;
            }
        }
        if (epsilon) {
            // 如果所有入边为Epsilon边,则不可到达
            unaccessiableList.add(status);
        }
    }
    // 初态设为有效
    unaccessiableList.remove(nfa.begin);
    BreadthFirstSearch<NFAEdge, NFAStatus> epsilonBFS = new BreadthFirstSearch<NFAEdge, NFAStatus>() {

        @Override
        public boolean testEdge(NFAEdge edge) {
            return edge.data.kAction == EdgeType.EPSILON;
        }
    };
    /* 遍历所有有效状态 */
    // 若为有效状态
    /* 获取当前状态的Epsilon闭包 */
    /* 去除自身状态 */
    /* 遍历Epsilon闭包的状态 */
    /* 如果闭包中有终态,则当前状态为终态 */
    /* 遍历闭包中所有边 */
    /* 如果当前边不是Epsilon边,就将闭包中的有效边添加到当前状态 */
    /* 如果当前边不是Epsilon边,就将闭包中的有效边添加到当前状态 */
    NFAStatusList.stream().filter(status -> !unaccessiableList.contains(status)).forEach(status -> {
        // 若为有效状态
        /* 获取当前状态的Epsilon闭包 */
        ArrayList<NFAStatus> epsilonClosure = getNFAStatusClosure(epsilonBFS, status);
        /* 去除自身状态 */
        epsilonClosure.remove(status);
        /* 遍历Epsilon闭包的状态 */
        for (NFAStatus epsilonStatus : epsilonClosure) {
            if (epsilonStatus.data.bFinal) {
                /* 如果闭包中有终态,则当前状态为终态 */
                status.data.bFinal = true;
            }
            /* 遍历闭包中所有边 */
            /* 如果当前边不是Epsilon边,就将闭包中的有效边添加到当前状态 */
            epsilonStatus.outEdges.stream().filter(edge -> edge.data.kAction != EdgeType.EPSILON).forEach(edge -> {
                /* 如果当前边不是Epsilon边,就将闭包中的有效边添加到当前状态 */
                connect(status, edge.end).data = edge.data;
            });
        }
    });
    /* 删除Epsilon边 */
    for (NFAStatus status : NFAStatusList) {
        for (Iterator<NFAEdge> it = status.outEdges.iterator(); it.hasNext(); ) {
            NFAEdge edge = it.next();
            if (edge.data.kAction == EdgeType.EPSILON) {
                it.remove();
                // 删除Epsilon边
                disconnect(status, edge);
            }
        }
    }
    /* 删除无效状态 */
    for (NFAStatus status : unaccessiableList) {
        // 删除无效状态
        NFAStatusList.remove(status);
        // 删除与状态有关的所有边
        disconnect(status);
    }
    unaccessiableList.clear();
    /* 删除重复边 */
    for (NFAStatus status : NFAStatusList) {
        for (int i = 0; i < status.outEdges.size() - 1; i++) {
            NFAEdge edge1 = status.outEdges.get(i);
            for (ListIterator<NFAEdge> it2 = status.outEdges.listIterator(i + 1); it2.hasNext(); ) {
                NFAEdge edge2 = it2.next();
                if (edge1.end == edge2.end && edge1.data.kAction == edge2.data.kAction && edge1.data.param == edge2.data.param) {
                    it2.remove();
                    disconnect(status, edge2);
                }
            }
        }
    }
}
Also used : Iterator(java.util.Iterator) ListIterator(java.util.ListIterator) DFAStatus(priv.bajdcc.util.lexer.automata.dfa.DFAStatus) Collection(java.util.Collection) IRegexComponent(priv.bajdcc.util.lexer.regex.IRegexComponent) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) BreadthFirstSearch(priv.bajdcc.util.lexer.automata.BreadthFirstSearch) ArrayList(java.util.ArrayList) EdgeType(priv.bajdcc.util.lexer.automata.EdgeType) NFAStatus(priv.bajdcc.util.lexer.automata.nfa.NFAStatus) NFAEdge(priv.bajdcc.util.lexer.automata.nfa.NFAEdge) NFA(priv.bajdcc.util.lexer.automata.nfa.NFA) BreadthFirstSearch(priv.bajdcc.util.lexer.automata.BreadthFirstSearch) NFAStatus(priv.bajdcc.util.lexer.automata.nfa.NFAStatus) ArrayList(java.util.ArrayList) NFAEdge(priv.bajdcc.util.lexer.automata.nfa.NFAEdge)

Example 5 with BreadthFirstSearch

use of priv.bajdcc.util.lexer.automata.BreadthFirstSearch in project jMiniLang by bajdcc.

the class DFAStatus method visit.

/**
 * 用于遍历包括该状态在内的所有状态(连通),结果存放在PATH中
 *
 * @param bfs
 *            遍历算法
 */
public void visit(BreadthFirstSearch<DFAEdge, DFAStatus> bfs) {
    ArrayList<DFAStatus> stack = bfs.arrStatus;
    HashSet<DFAStatus> set = new HashSet<>();
    stack.clear();
    set.add(this);
    stack.add(this);
    for (int i = 0; i < stack.size(); i++) {
        // 遍历每个状态
        DFAStatus status = stack.get(i);
        VisitBag bag = new VisitBag();
        bfs.visitBegin(status, bag);
        if (bag.bVisitChildren) {
            // 遍历状态的出边
            // 边未被访问,且边类型符合要求
            status.outEdges.stream().filter(edge -> !set.contains(edge.end) && bfs.testEdge(edge)).forEach(edge -> {
                // 边未被访问,且边类型符合要求
                stack.add(edge.end);
                set.add(edge.end);
            });
        }
        if (bag.bVisitEnd) {
            bfs.visitEnd(status);
        }
    }
}
Also used : HashSet(java.util.HashSet) VisitBag(priv.bajdcc.util.VisitBag) BreadthFirstSearch(priv.bajdcc.util.lexer.automata.BreadthFirstSearch) ArrayList(java.util.ArrayList) VisitBag(priv.bajdcc.util.VisitBag) HashSet(java.util.HashSet)

Aggregations

BreadthFirstSearch (priv.bajdcc.util.lexer.automata.BreadthFirstSearch)7 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)5 VisitBag (priv.bajdcc.util.VisitBag)4 HashMap (java.util.HashMap)2 Iterator (java.util.Iterator)2 Collectors (java.util.stream.Collectors)2 NGAEdge (priv.bajdcc.LALR1.syntax.automata.nga.NGAEdge)2 NGAStatus (priv.bajdcc.LALR1.syntax.automata.nga.NGAStatus)2 Collection (java.util.Collection)1 ListIterator (java.util.ListIterator)1 Entry (java.util.Map.Entry)1 ISemanticAction (priv.bajdcc.LALR1.semantic.token.ISemanticAction)1 NGA (priv.bajdcc.LALR1.syntax.automata.nga.NGA)1 NGAEdgeType (priv.bajdcc.LALR1.syntax.automata.nga.NGAEdgeType)1 RuleExp (priv.bajdcc.LALR1.syntax.exp.RuleExp)1 TokenExp (priv.bajdcc.LALR1.syntax.exp.TokenExp)1 Rule (priv.bajdcc.LALR1.syntax.rule.Rule)1 RuleItem (priv.bajdcc.LALR1.syntax.rule.RuleItem)1 EdgeType (priv.bajdcc.util.lexer.automata.EdgeType)1