Search in sources :

Example 1 with PageAnalysis

use of org.wikipediacleaner.api.data.analysis.PageAnalysis in project wpcleaner by WPCleaner.

the class AlgorithmError method analyzeError.

/**
 * Analyze a page to find errors of a given type.
 *
 * @param algorithm Algorithm.
 * @param pageAnalysis Page analysis.
 * @return Error page.
 */
public static CheckErrorPage analyzeError(CheckErrorAlgorithm algorithm, PageAnalysis pageAnalysis) {
    if ((algorithm == null) || (pageAnalysis == null)) {
        return null;
    }
    Performance perf = null;
    if (traceTime) {
        perf = Performance.getInstance("CheckError.analyzeError");
    }
    CheckErrorPage errorPage = new CheckErrorPage(pageAnalysis.getPage(), algorithm);
    boolean errorFound = false;
    List<CheckErrorResult> errorsFound = new ArrayList<>();
    int errorNumber = algorithm.getErrorNumber();
    PageAnalysis.Result result = pageAnalysis.getCheckWikiErrors(errorNumber);
    if (result != null) {
        errorFound = result.getErrors(errorsFound);
    } else {
        errorFound = algorithm.analyze(pageAnalysis, errorsFound, false);
        pageAnalysis.setCheckWikiErrors(errorNumber, errorFound, errorsFound);
    }
    errorPage.setResults(errorFound, errorsFound);
    if (perf != null) {
        perf.printStep("Error n°" + algorithm.getErrorNumber());
        perf.release();
    }
    return errorPage;
}
Also used : CheckErrorResult(org.wikipediacleaner.api.check.CheckErrorResult) ArrayList(java.util.ArrayList) PageAnalysis(org.wikipediacleaner.api.data.analysis.PageAnalysis) CheckErrorPage(org.wikipediacleaner.api.check.CheckErrorPage) Performance(org.wikipediacleaner.utils.Performance)

Example 2 with PageAnalysis

use of org.wikipediacleaner.api.data.analysis.PageAnalysis in project wpcleaner by WPCleaner.

the class AlgorithmError method computeErrorsFixed.

/**
 * @param initialErrors List of initial errors.
 * @param contents Current contents.
 * @param shouldCheckSpelling True if spelling should be checked.
 * @return Information about errors fixed.
 */
public static List<Progress> computeErrorsFixed(List<CheckErrorPage> initialErrors, String contents, boolean shouldCheckSpelling) {
    final List<Progress> errorsFixed = new ArrayList<>();
    PageAnalysis analysis = null;
    if (initialErrors != null) {
        for (CheckErrorPage initialError : initialErrors) {
            if (analysis == null) {
                analysis = initialError.getPage().getAnalysis(contents, true);
                analysis.shouldCheckSpelling(shouldCheckSpelling);
            }
            CheckErrorPage errorPage = analyzeError(initialError.getAlgorithm(), analysis);
            if ((errorPage.getErrorFound() == false) || (errorPage.getActiveResultsCount() < initialError.getActiveResultsCount())) {
                errorsFixed.add(new Progress(initialError.getAlgorithm(), errorPage.getErrorFound() == false));
            }
        }
    }
    return errorsFixed;
}
Also used : ArrayList(java.util.ArrayList) PageAnalysis(org.wikipediacleaner.api.data.analysis.PageAnalysis) CheckErrorPage(org.wikipediacleaner.api.check.CheckErrorPage)

Example 3 with PageAnalysis

use of org.wikipediacleaner.api.data.analysis.PageAnalysis in project wpcleaner by WPCleaner.

the class MainWindow method actionLoadList.

/**
 * Action called when Load List button is pressed.
 */
public void actionLoadList() {
    EnumWikipedia wikipedia = getWikipedia();
    if (wikipedia == null) {
        return;
    }
    // Ask which file should be loaded
    JFileChooser chooser = new JFileChooser();
    FileNameExtensionFilter filter = new FileNameExtensionFilter("Text files", "txt");
    chooser.setFileFilter(filter);
    int returnVal = chooser.showOpenDialog(this.getParentComponent());
    if (returnVal != JFileChooser.APPROVE_OPTION) {
        return;
    }
    File chosenFile = chooser.getSelectedFile();
    if ((chosenFile == null) || !chosenFile.isFile() || !chosenFile.canRead()) {
        return;
    }
    // Ask in which format the file is
    String[] values = { GT._T("Unformatted list of page names"), GT._T("Internal links in a formatted list") };
    String message = GT._T("The file must be encoded in UTF-8 to be read correctly.") + "\n" + GT._T("In which format is the file?");
    String value = Utilities.askForValue(getParentComponent(), message, values, true, values[0], (StringChecker) null);
    if (value == null) {
        return;
    }
    int choice = 0;
    for (int i = 0; i < values.length; i++) {
        if (value.equals(values[i])) {
            choice = i;
        }
    }
    // Read file
    List<String> pages = new ArrayList<>();
    BufferedReader reader = null;
    String line = null;
    try {
        reader = new BufferedReader(new InputStreamReader(new FileInputStream(chosenFile), "UTF8"));
        switch(choice) {
            case // Unformatted list
            0:
                while ((line = reader.readLine()) != null) {
                    if (line.trim().length() > 0) {
                        pages.add(line);
                    }
                }
                break;
            case // Formatted list with internal links
            1:
                StringBuilder buffer = new StringBuilder();
                while ((line = reader.readLine()) != null) {
                    if (buffer.length() > 0) {
                        buffer.append('\n');
                    }
                    buffer.append(line);
                }
                Page tmpPage = DataManager.getPage(getWiki(), chosenFile.getName(), null, null, null);
                String contents = buffer.toString();
                tmpPage.setContents(contents);
                PageAnalysis analysis = tmpPage.getAnalysis(contents, false);
                List<PageElementInternalLink> links = analysis.getInternalLinks();
                for (PageElementInternalLink link : links) {
                    String target = link.getLink();
                    if (target.startsWith(":")) {
                        target = target.substring(1);
                    }
                    if (!pages.contains(target)) {
                        pages.add(target);
                    }
                }
                break;
        }
        new PageListWorker(wikipedia, this, null, pages, PageListWorker.Mode.DIRECT, true, GT._T("List")).start();
    } catch (IOException e) {
    // 
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception e) {
            // 
            }
        }
    }
}
Also used : PageElementInternalLink(org.wikipediacleaner.api.data.PageElementInternalLink) PageListWorker(org.wikipediacleaner.gui.swing.pagelist.PageListWorker) InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) PageAnalysis(org.wikipediacleaner.api.data.analysis.PageAnalysis) EnumQueryPage(org.wikipediacleaner.api.constants.EnumQueryPage) Page(org.wikipediacleaner.api.data.Page) ConfigurationValueString(org.wikipediacleaner.utils.ConfigurationValueString) IOException(java.io.IOException) FileNameExtensionFilter(javax.swing.filechooser.FileNameExtensionFilter) FileInputStream(java.io.FileInputStream) APIException(org.wikipediacleaner.api.APIException) IOException(java.io.IOException) JFileChooser(javax.swing.JFileChooser) BufferedReader(java.io.BufferedReader) EnumWikipedia(org.wikipediacleaner.api.constants.EnumWikipedia) File(java.io.File)

Example 4 with PageAnalysis

use of org.wikipediacleaner.api.data.analysis.PageAnalysis in project wpcleaner by WPCleaner.

the class OnePageWindow method initializeInitialErrors.

/**
 * Initialize list of initial errors.
 *
 * @param algorithms Algorithms.
 */
protected void initializeInitialErrors(Collection<CheckErrorAlgorithm> algorithms) {
    if (page != null) {
        PageAnalysis pageAnalysis = page.getAnalysis(page.getContents(), false);
        pageAnalysis.shouldCheckSpelling(shouldCheckSpelling());
        List<CheckErrorPage> errorsFound = AlgorithmError.analyzeErrors(algorithms, pageAnalysis, false);
        initialErrors = new ArrayList<>();
        if (errorsFound != null) {
            for (CheckErrorPage tmpError : errorsFound) {
                initialErrors.add(tmpError);
            }
        }
    }
}
Also used : PageAnalysis(org.wikipediacleaner.api.data.analysis.PageAnalysis) CheckErrorPage(org.wikipediacleaner.api.check.CheckErrorPage)

Example 5 with PageAnalysis

use of org.wikipediacleaner.api.data.analysis.PageAnalysis in project wpcleaner by WPCleaner.

the class ActionInsertPredefinedText method actionAddTemplate.

/**
 * Action called when a template is selected to be added.
 *
 * @param templateName Template name.
 */
public void actionAddTemplate(String templateName) {
    if ((templateName == null) || (pageProvider == null)) {
        return;
    }
    Page page = pageProvider.getPage();
    if (page == null) {
        return;
    }
    String contents = pane.getText();
    PageAnalysis analysis = page.getAnalysis(contents, false);
    // Check that the template isn't already applied
    if (analysis.hasTemplate(templateName) != null) {
        return;
    }
    // Find where to add the template
    int crBefore = 0;
    int crAfter = 2;
    int index = contents.length();
    List<PageElementTemplate> templates = analysis.getTemplates();
    if ((templates != null) && (!templates.isEmpty())) {
        index = templates.get(0).getBeginIndex();
        crAfter = 1;
        int indexNewLine = contents.indexOf('\n');
        if ((indexNewLine > 0) && (indexNewLine > index)) {
            crBefore = 2;
        }
    } else {
        List<PageElementCategory> categories = analysis.getCategories();
        if ((categories != null) && (!categories.isEmpty())) {
            index = categories.get(0).getBeginIndex();
        } else {
            List<PageElementLanguageLink> langLinks = analysis.getLanguageLinks();
            if ((langLinks != null) && (!langLinks.isEmpty())) {
                index = langLinks.get(0).getBeginIndex();
            } else {
                int indexNewLine = contents.indexOf('\n');
                if (indexNewLine > 0) {
                    index = indexNewLine;
                }
                crBefore = 2;
                crAfter = 0;
            }
        }
    }
    // Add the template
    StringBuilder newContents = new StringBuilder();
    if (index > 0) {
        newContents.append(contents.substring(0, index));
    }
    for (int i = 0; i < crBefore; i++) {
        newContents.append("\n");
    }
    newContents.append(TemplateBuilder.from(templateName).toString());
    for (int i = 0; i < crAfter; i++) {
        newContents.append("\n");
    }
    if (index < contents.length()) {
        newContents.append(contents.substring(index));
    }
    pane.changeText(newContents.toString());
    if (listener != null) {
        listener.templateInserted(templateName);
    }
}
Also used : PageElementTemplate(org.wikipediacleaner.api.data.PageElementTemplate) PageElementLanguageLink(org.wikipediacleaner.api.data.PageElementLanguageLink) PageElementCategory(org.wikipediacleaner.api.data.PageElementCategory) PageAnalysis(org.wikipediacleaner.api.data.analysis.PageAnalysis) Page(org.wikipediacleaner.api.data.Page) ConfigurationValueString(org.wikipediacleaner.utils.ConfigurationValueString)

Aggregations

PageAnalysis (org.wikipediacleaner.api.data.analysis.PageAnalysis)61 WPCConfigurationString (org.wikipediacleaner.api.configuration.WPCConfigurationString)17 ConfigurationValueString (org.wikipediacleaner.utils.ConfigurationValueString)16 PageElementTemplate (org.wikipediacleaner.api.data.PageElementTemplate)14 ArrayList (java.util.ArrayList)13 Page (org.wikipediacleaner.api.data.Page)13 CheckErrorPage (org.wikipediacleaner.api.check.CheckErrorPage)12 APIException (org.wikipediacleaner.api.APIException)10 WPCConfiguration (org.wikipediacleaner.api.configuration.WPCConfiguration)8 PageElementInternalLink (org.wikipediacleaner.api.data.PageElementInternalLink)8 HashMap (java.util.HashMap)7 API (org.wikipediacleaner.api.API)7 CheckErrorResult (org.wikipediacleaner.api.check.CheckErrorResult)7 CheckErrorAlgorithm (org.wikipediacleaner.api.check.algorithm.CheckErrorAlgorithm)6 PageElementCategory (org.wikipediacleaner.api.data.PageElementCategory)5 AlgorithmError (org.wikipediacleaner.api.algorithm.AlgorithmError)4 EnumWikipedia (org.wikipediacleaner.api.constants.EnumWikipedia)4 ContentsComment (org.wikipediacleaner.api.data.contents.comment.ContentsComment)4 MediaWiki (org.wikipediacleaner.api.MediaWiki)3 Namespace (org.wikipediacleaner.api.data.Namespace)3