Search in sources :

Example 1 with URLResourceLoader

use of org.tinystruct.system.util.URLResourceLoader in project bible-online by m0ver.

the class crawler method start.

public void start() throws MalformedURLException {
    try {
        bible bible = new bible();
        bible.setTableName("zh_CN");
        int partId;
        String text;
        URL url;
        URLResourceLoader loader;
        Row o;
        StringBuilder content;
        String words;
        Pattern pat = Pattern.compile("<span class=\"verse\" id=\"(\\d+)\">.*</span>\\s*(.*)(\r\n)*");
        Matcher mat;
        Table list = null;
        for (int i = 1; i <= 66; i++) {
            o = bible.findOne("SELECT max(chapter_id) as n FROM zh_CN WHERE book_id=?", new Object[] { i });
            int n = o.get(0).get("n").intValue();
            url = new URL("https://www.wordplanet.org/kj/b" + (i < 10 ? "0" + i : i) + ".htm");
            loader = new URLResourceLoader(url, true);
            content = loader.getContent();
            int j = 1;
            KJV KJV = new KJV();
            KJV.setBookId(i);
            String defaults = "<h3>Chapter 1</h3>";
            words = content.substring(content.indexOf(defaults) + defaults.length(), content.lastIndexOf("<div class=\"alignRight ym-noprint\">"));
            words = words.replaceAll("<br/>", "\r\n");
            mat = pat.matcher(words);
            while (mat.find()) {
                partId = Integer.parseInt(mat.group(1));
                if (partId == 1) {
                    KJV.setChapterId(j);
                    // Check the chapter id if it's existing in the db.
                    list = KJV.find("SELECT * FROM KJV WHERE book_id = ? and chapter_id = ? order by part_id desc", new Object[] { KJV.getBookId(), KJV.getChapterId() });
                    j++;
                }
                if (null != list && !list.isEmpty() && list.firstElement().getFieldInfo("part_id").intValue() >= partId) {
                    continue;
                }
                KJV kjv = new KJV();
                kjv.setBookId(i);
                kjv.setChapterId(KJV.getChapterId());
                text = mat.group().replaceAll("<span class=\"verse\" id=\"" + partId + "\">" + partId + "</span>\\s*", "");
                kjv.setContent(text);
                kjv.setPartId(partId++);
                kjv.append();
                System.out.println(text);
            }
        }
    } catch (ApplicationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : Pattern(java.util.regex.Pattern) custom.objects.bible(custom.objects.bible) Table(org.tinystruct.data.component.Table) Matcher(java.util.regex.Matcher) URL(java.net.URL) ApplicationException(org.tinystruct.ApplicationException) URLResourceLoader(org.tinystruct.system.util.URLResourceLoader) KJV(custom.objects.KJV) Row(org.tinystruct.data.component.Row)

Example 2 with URLResourceLoader

use of org.tinystruct.system.util.URLResourceLoader in project bible-online by m0ver.

the class sender method getWord.

public String getWord(String word) throws MalformedURLException, ApplicationException {
    String url = "http://dict.youdao.com/fsearch?client=deskdict&keyfrom=chrome.extension&q=" + word + "&pos=-1&doctype=xml&vendor=unknown&appVer=3.1.17.4208&le=eng";
    this.request = (HttpServletRequest) this.context.getAttribute(HTTP_REQUEST);
    HttpSession session = request.getSession();
    if (session.getAttribute("usr") != null) {
        this.user = (User) session.getAttribute("usr");
        Document doc = new Document();
        doc.load(new URL(url));
        Element document = doc.getRoot();
        if (document.getChildNodes().size() < 3)
            throw new ApplicationException("The dictionary resource is temporarily unavailable");
        vocabulary vocabulary = new vocabulary();
        vocabulary.setUserId(this.user.getId());
        vocabulary.setDate(LocalDateTime.now());
        vocabulary.setReferenceLink(this.request.getParameter("referer").toString());
        List<Element> phrase = document.getElementsByTagName("return-phrase");
        if (phrase.size() > 0) {
            vocabulary.setWord(phrase.get(0).getData());
        }
        List<Element> phonetic_symbol = document.getElementsByTagName("phonetic-symbol");
        if (phonetic_symbol.size() > 0) {
            vocabulary.setPhoneticSymbol(phonetic_symbol.get(0).getData());
        }
        List<Element> custom_translation = document.getElementsByTagName("custom-translation");
        if (custom_translation.size() > 0) {
            StringBuffer buff = new StringBuffer();
            Iterator<Element> citerator = custom_translation.get(0).getElementsByTagName("translation").iterator();
            while (citerator.hasNext()) {
                if (buff.length() > 0)
                    buff.append("\r\n");
                buff.append(citerator.next().getElementsByTagName("content").get(0).getData());
            }
            vocabulary.setInterpretation(buff.toString());
        }
        Table words = vocabulary.findWith("WHERE word=? and user_id=?", new Object[] { vocabulary.getWord(), this.user.getId() });
        if (words.isEmpty()) {
            vocabulary.append();
        } else {
            vocabulary.setData(words.get(0));
            vocabulary.setDate(LocalDateTime.now());
            vocabulary.update();
        }
        return document.toString();
    }
    URLResourceLoader loader = new URLResourceLoader(new URL(url));
    return loader.getContent().toString();
}
Also used : ApplicationException(org.tinystruct.ApplicationException) custom.objects.vocabulary(custom.objects.vocabulary) Table(org.tinystruct.data.component.Table) URLResourceLoader(org.tinystruct.system.util.URLResourceLoader) HttpSession(javax.servlet.http.HttpSession) Element(org.tinystruct.dom.Element) Document(org.tinystruct.dom.Document) URL(java.net.URL)

Example 3 with URLResourceLoader

use of org.tinystruct.system.util.URLResourceLoader in project bible-online by m0ver.

the class sender method getTranslate.

public StringBuilder getTranslate(String words) throws MalformedURLException, ApplicationException {
    String url = "http://fanyi.youdao.com/translate?client=deskdict&keyfrom=chrome.extension&xmlVersion=1.1&dogVersion=1.0&ue=utf8&i=" + (words) + "&doctype=xml";
    URLResourceLoader loader = new URLResourceLoader(new URL(url));
    return loader.getContent();
}
Also used : URLResourceLoader(org.tinystruct.system.util.URLResourceLoader) URL(java.net.URL)

Example 4 with URLResourceLoader

use of org.tinystruct.system.util.URLResourceLoader in project bible-online by m0ver.

the class crawler method startJPV.

public void startJPV() throws MalformedURLException {
    try {
        JPV jpv = new JPV();
        bible bible = new bible();
        bible.setTableName("zh_CN");
        int partId;
        String text;
        URL url;
        URLResourceLoader loader;
        Row o;
        StringBuilder content;
        String words;
        Pattern pat = Pattern.compile("</span>\\s*(.*)(\r\n)*");
        Matcher mat;
        Table list;
        for (int i = 1; i <= 66; i++) {
            o = bible.findOne("SELECT max(chapter_id) as n FROM zh_CN WHERE book_id=?", new Object[] { i });
            int n = o.get(0).get("n").intValue();
            for (int j = 1; j <= n; j++) {
                jpv.setBookId(i);
                jpv.setChapterId(j);
                list = jpv.find("SELECT * FROM KJV WHERE book_id = ? and chapter_id = ? order by id desc", new Object[] { jpv.getBookId(), jpv.getChapterId() });
                if (list.size() > 0) {
                    continue;
                }
                partId = 1;
                url = new URL("http://www.wordplanet.org/kj/" + (i < 10 ? "0" + i : i) + "/" + j + ".htm");
                loader = new URLResourceLoader(url);
                content = loader.getContent();
                String defaults = "<span class=\"verse\" id=\"1\">1 </span>";
                if (content.indexOf(defaults) == -1)
                    defaults = "<span class=\"verse\" id=\"1\">1</span>";
                words = content.substring(content.indexOf(defaults), content.indexOf("<!--... sharper than any twoedged sword... -->"));
                words = words.replaceAll("<br/>", "\r\n");
                mat = pat.matcher(words);
                while (mat.find()) {
                    if (list.size() > 0 && list.firstElement().getFieldInfo("part_id").intValue() >= partId) {
                        continue;
                    }
                    jpv.setPartId(partId++);
                    text = mat.group().replaceAll("</span>\\s*", "");
                    jpv.setContent(text);
                    jpv.append();
                    System.out.println(text);
                }
            }
        }
    } catch (ApplicationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : Pattern(java.util.regex.Pattern) custom.objects.bible(custom.objects.bible) Table(org.tinystruct.data.component.Table) Matcher(java.util.regex.Matcher) URL(java.net.URL) JPV(custom.objects.JPV) ApplicationException(org.tinystruct.ApplicationException) URLResourceLoader(org.tinystruct.system.util.URLResourceLoader) Row(org.tinystruct.data.component.Row)

Aggregations

URL (java.net.URL)4 URLResourceLoader (org.tinystruct.system.util.URLResourceLoader)4 ApplicationException (org.tinystruct.ApplicationException)3 Table (org.tinystruct.data.component.Table)3 custom.objects.bible (custom.objects.bible)2 Matcher (java.util.regex.Matcher)2 Pattern (java.util.regex.Pattern)2 Row (org.tinystruct.data.component.Row)2 JPV (custom.objects.JPV)1 KJV (custom.objects.KJV)1 custom.objects.vocabulary (custom.objects.vocabulary)1 HttpSession (javax.servlet.http.HttpSession)1 Document (org.tinystruct.dom.Document)1 Element (org.tinystruct.dom.Element)1