Search in sources :

Example 21 with APIException

use of org.wikipediacleaner.api.APIException in project wpcleaner by WPCleaner.

the class ApiXmlRevisionsResult method executeLastRevision.

/**
 * Execute last revision request.
 *
 * @param properties Properties defining request.
 * @param pages Pages to be filled with last revision content.
 * @return True if request should be continued.
 * @throws APIException Exception thrown by the API.
 */
@Override
public boolean executeLastRevision(Map<String, String> properties, Collection<Page> pages) throws APIException {
    try {
        Element root = getRoot(properties, ApiRequest.MAX_ATTEMPTS);
        // Manage redirects and missing pages
        updateRedirect(root, pages);
        // Retrieving normalization information
        Map<String, String> normalization = new HashMap<>();
        retrieveNormalization(root, normalization);
        // Retrieve current timestamp
        Optional<String> currentTimestamp = getCurrentTimestamp(root);
        // Retrieve pages
        XPathExpression<Element> xpa = XPathFactory.instance().compile("/api/query/pages/page", Filters.element());
        List<Element> results = xpa.evaluate(root);
        Iterator<Element> iter = results.iterator();
        while (iter.hasNext()) {
            Element pageNode = iter.next();
            String title = pageNode.getAttributeValue("title");
            Integer pageId = null;
            try {
                String pageIdValue = pageNode.getAttributeValue("pageid");
                if (pageIdValue != null) {
                    pageId = Integer.valueOf(pageIdValue);
                }
            } catch (NumberFormatException e) {
                System.err.println("Incorrect page id");
            }
            String namespace = pageNode.getAttributeValue("ns");
            for (Page tmpPage : pages) {
                Iterator<Page> itPage = tmpPage.getRedirects().getIteratorWithPage();
                while (itPage.hasNext()) {
                    Page page = itPage.next();
                    boolean samePage = false;
                    if ((pageId != null) && (page.getPageId() != null)) {
                        samePage = pageId.equals(page.getPageId());
                    } else {
                        samePage = Page.areSameTitle(page.getTitle(), title);
                        if (!samePage) {
                            String normalizedTitle = getNormalizedTitle(page.getTitle(), normalization);
                            samePage = Page.areSameTitle(normalizedTitle, title);
                        }
                    }
                    if (samePage) {
                        page.setNamespace(namespace);
                        currentTimestamp.ifPresent(timestamp -> page.setStartTimestamp(timestamp));
                        updatePageInformation(pageNode, page);
                        // Retrieve revisions
                        if (!Boolean.FALSE.equals(page.isExisting())) {
                            XPathExpression<Element> xpaRevisions = XPathFactory.instance().compile("revisions/rev", Filters.element());
                            Element revNode = xpaRevisions.evaluateFirst(pageNode);
                            if (revNode != null) {
                                XPathExpression<Element> xpaSlots = XPathFactory.instance().compile("slots/slot", Filters.element());
                                Element slotNode = xpaSlots.evaluateFirst(revNode);
                                if (slotNode != null) {
                                    page.setContents(slotNode.getText());
                                    page.setExisting(Boolean.TRUE);
                                    page.setRevisionId(revNode.getAttributeValue("revid"));
                                    page.setContentsTimestamp(revNode.getAttributeValue("timestamp"));
                                }
                            }
                        }
                    }
                }
            }
        }
        // Retrieve continue
        return shouldContinue(root, "/api/query-continue/revisions", properties);
    } catch (JDOMException e) {
        log.error("Error loading revisions", e);
        throw new APIException("Error parsing XML", e);
    }
}
Also used : HashMap(java.util.HashMap) Element(org.jdom2.Element) Page(org.wikipediacleaner.api.data.Page) JDOMException(org.jdom2.JDOMException) APIException(org.wikipediacleaner.api.APIException)

Example 22 with APIException

use of org.wikipediacleaner.api.APIException in project wpcleaner by WPCleaner.

the class ApiXmlAbuseFiltersResult method executeAbuseFilters.

/**
 * Execute abuse filters request.
 *
 * @param properties Properties defining request.
 * @param list List to be filled with abuse filters.
 * @return True if request should be continued.
 * @throws APIException Exception thrown by the API.
 */
@Override
public boolean executeAbuseFilters(Map<String, String> properties, List<AbuseFilter> list) throws APIException {
    try {
        Element root = getRoot(properties, ApiRequest.MAX_ATTEMPTS);
        // Retrieve category members
        XPathExpression<Element> xpa = XPathFactory.instance().compile("/api/query/abusefilters/filter", Filters.element());
        List<Element> results = xpa.evaluate(root);
        Iterator<Element> iter = results.iterator();
        while (iter.hasNext()) {
            Element currentNode = iter.next();
            Integer id = Integer.valueOf(0);
            try {
                String tmp = currentNode.getAttributeValue("id");
                if (tmp != null) {
                    id = Integer.parseInt(tmp);
                }
            } catch (NumberFormatException e) {
            // 
            }
            String description = currentNode.getAttributeValue("description");
            AbuseFilter filter = new AbuseFilter(id, description);
            filter.setDeleted(currentNode.getAttribute("deleted") != null);
            filter.setEnabled(currentNode.getAttribute("enabled") != null);
            list.add(filter);
        }
        // Retrieve continue
        return shouldContinue(root, "/api/query-continue/abusefilters", properties);
    } catch (JDOMException e) {
        log.error("Error loading abuse filters list", e);
        throw new APIException("Error parsing XML", e);
    }
}
Also used : AbuseFilter(org.wikipediacleaner.api.data.AbuseFilter) APIException(org.wikipediacleaner.api.APIException) Element(org.jdom2.Element) JDOMException(org.jdom2.JDOMException)

Example 23 with APIException

use of org.wikipediacleaner.api.APIException in project wpcleaner by WPCleaner.

the class ApiXmlBacklinksResult method executeBacklinks.

/**
 * Execute back links request.
 *
 * @param properties Properties defining request.
 * @param page Page.
 * @param list List of pages to be filled with the back links.
 * @return True if request should be continued.
 * @throws APIException Exception thrown by the API.
 */
@Override
public boolean executeBacklinks(Map<String, String> properties, Page page, List<Page> list) throws APIException {
    try {
        Element root = getRoot(properties, ApiRequest.MAX_ATTEMPTS);
        // Retrieve back links
        XPathExpression<Element> xpa = XPathFactory.instance().compile("/api/query/backlinks/bl", Filters.element());
        List<Element> listBacklinks = xpa.evaluate(root);
        Iterator<Element> itBacklink = listBacklinks.iterator();
        XPathExpression<Element> xpaRedirLinks = XPathFactory.instance().compile("redirlinks/bl", Filters.element());
        while (itBacklink.hasNext()) {
            Element currentBacklink = itBacklink.next();
            Page link = DataManager.getPage(getWiki(), currentBacklink.getAttributeValue("title"), null, null, null);
            link.setNamespace(currentBacklink.getAttributeValue("ns"));
            link.setPageId(currentBacklink.getAttributeValue("pageid"));
            if (currentBacklink.getAttribute("redirect") != null) {
                // TODO: Check if fragment is available
                link.getRedirects().add(page, null);
            }
            if (!list.contains(link)) {
                list.add(link);
            }
            // Links through redirects
            List<Element> listRedirLinks = xpaRedirLinks.evaluate(currentBacklink);
            if (listRedirLinks != null) {
                List<Page> linkList = new ArrayList<>();
                Iterator<Element> itRedirLink = listRedirLinks.iterator();
                while (itRedirLink.hasNext()) {
                    currentBacklink = itRedirLink.next();
                    Page link2 = DataManager.getPage(getWiki(), currentBacklink.getAttributeValue("title"), null, null, null);
                    link2.setNamespace(currentBacklink.getAttributeValue("ns"));
                    link2.setPageId(currentBacklink.getAttributeValue("pageid"));
                    if (!list.contains(link2)) {
                        list.add(link2);
                    }
                    if (!linkList.contains(link2)) {
                        linkList.add(link2);
                    }
                }
                link.setRelatedPages(Page.RelatedPages.BACKLINKS, linkList);
            }
        }
        // Retrieve continue
        return shouldContinue(root, "/api/query-continue/backlinks", properties);
    } catch (JDOMException e) {
        log.error("Error loading back links", e);
        throw new APIException("Error parsing XML", e);
    }
}
Also used : APIException(org.wikipediacleaner.api.APIException) Element(org.jdom2.Element) ArrayList(java.util.ArrayList) Page(org.wikipediacleaner.api.data.Page) JDOMException(org.jdom2.JDOMException)

Example 24 with APIException

use of org.wikipediacleaner.api.APIException in project wpcleaner by WPCleaner.

the class ApiXmlCategoryMembersResult method executeCategoryMembers.

/**
 * Execute category members request.
 *
 * @param properties Properties defining request.
 * @param list List to be filled with category members.
 * @param categories Map of categories to be analyzed with their depth.
 * @param depth Current depth of the analysis.
 * @return True if request should be continued.
 * @throws APIException Exception thrown by the API.
 */
@Override
public boolean executeCategoryMembers(Map<String, String> properties, List<Page> list, Map<Page, Integer> categories, int depth) throws APIException {
    try {
        Element root = getRoot(properties, ApiRequest.MAX_ATTEMPTS);
        // Retrieve category members
        XPathExpression<Element> xpa = XPathFactory.instance().compile("/api/query/categorymembers/cm", Filters.element());
        List<Element> results = xpa.evaluate(root);
        Iterator<Element> iter = results.iterator();
        while (iter.hasNext()) {
            Element currentNode = iter.next();
            Page page = DataManager.getPage(getWiki(), currentNode.getAttributeValue("title"), null, null, null);
            page.setNamespace(currentNode.getAttributeValue("ns"));
            page.setPageId(currentNode.getAttributeValue("pageid"));
            if ((page.getNamespace() != null) && (page.getNamespace().intValue() == Namespace.CATEGORY)) {
                categories.put(page, depth + 1);
            } else {
                if (!list.contains(page)) {
                    list.add(page);
                }
            }
        }
        // Retrieve continue
        return shouldContinue(root, "/api/query-continue/categorymembers", properties);
    } catch (JDOMException e) {
        log.error("Error loading category members list", e);
        throw new APIException("Error parsing XML", e);
    }
}
Also used : APIException(org.wikipediacleaner.api.APIException) Element(org.jdom2.Element) Page(org.wikipediacleaner.api.data.Page) JDOMException(org.jdom2.JDOMException)

Example 25 with APIException

use of org.wikipediacleaner.api.APIException in project wpcleaner by WPCleaner.

the class ApiXmlPagesWithPropResult method executePagesWithProp.

/**
 * Execute pages with property request.
 *
 * @param properties Properties defining request.
 * @param list List to be filled with protected titles.
 * @return True if request should be continued.
 * @throws APIException Exception thrown by the API.
 */
@Override
public boolean executePagesWithProp(Map<String, String> properties, List<Page> list) throws APIException {
    try {
        Element root = getRoot(properties, ApiRequest.MAX_ATTEMPTS);
        // Retrieve embedding pages
        XPathExpression<Element> xpa = XPathFactory.instance().compile("/api/query/pageswithprop/page", Filters.element());
        List<Element> results = xpa.evaluate(root);
        Iterator<Element> iter = results.iterator();
        while (iter.hasNext()) {
            Element currentNode = iter.next();
            Integer pageId = null;
            try {
                String tmp = currentNode.getAttributeValue("pageid");
                if (tmp != null) {
                    pageId = Integer.valueOf(tmp);
                }
            } catch (NumberFormatException e) {
            // 
            }
            Page page = DataManager.getPage(getWiki(), currentNode.getAttributeValue("title"), pageId, null, null);
            page.setNamespace(currentNode.getAttributeValue("ns"));
            list.add(page);
        }
        // Retrieve continue
        return shouldContinue(root, "/api/query-continue/pageswithprop", properties);
    } catch (JDOMException e) {
        log.error("Error loading protected titles list", e);
        throw new APIException("Error parsing XML", e);
    }
}
Also used : APIException(org.wikipediacleaner.api.APIException) Element(org.jdom2.Element) Page(org.wikipediacleaner.api.data.Page) JDOMException(org.jdom2.JDOMException)

Aggregations

APIException (org.wikipediacleaner.api.APIException)128 Page (org.wikipediacleaner.api.data.Page)70 API (org.wikipediacleaner.api.API)42 Element (org.jdom2.Element)41 JDOMException (org.jdom2.JDOMException)37 ArrayList (java.util.ArrayList)36 EnumWikipedia (org.wikipediacleaner.api.constants.EnumWikipedia)15 IOException (java.io.IOException)12 WPCConfigurationString (org.wikipediacleaner.api.configuration.WPCConfigurationString)12 Configuration (org.wikipediacleaner.utils.Configuration)11 ConfigurationValueString (org.wikipediacleaner.utils.ConfigurationValueString)11 WPCConfiguration (org.wikipediacleaner.api.configuration.WPCConfiguration)10 MediaWiki (org.wikipediacleaner.api.MediaWiki)9 PageAnalysis (org.wikipediacleaner.api.data.analysis.PageAnalysis)9 HashMap (java.util.HashMap)8 EnumQueryPage (org.wikipediacleaner.api.constants.EnumQueryPage)7 BufferedInputStream (java.io.BufferedInputStream)6 InputStream (java.io.InputStream)6 GZIPInputStream (java.util.zip.GZIPInputStream)6 Header (org.apache.commons.httpclient.Header)6