Search in sources :

Example 16 with PatternMatcher

use of org.apache.oro.text.regex.PatternMatcher in project tdi-studio-se by Talend.

the class NodeQueryCheckUtil method compareNodeTableColumnsWithFunc.

/**
     * 
     * DOC wzhang Comment method "compareNodeTableColumnsWithFunc".
     * 
     * @param node
     * @param columns
     * @return
     */
private static boolean compareNodeTableColumnsWithFunc(Node node, String columns) {
    String originalColumns = columns;
    if (node.getMetadataList().size() == 0) {
        return true;
    }
    IMetadataTable metaTable = node.getMetadataList().get(0);
    if (metaTable == null || metaTable.getListColumns() == null) {
        return true;
    }
    int originColumnSize = metaTable.getListColumns().size();
    // modified by wzhang. replace the field to one String if it contains function
    //$NON-NLS-1$  
    columns = columns.replaceAll(FUNC_SPLIT, "column");
    //$NON-NLS-1$
    String[] columnArray = columns.split(",");
    // columns not match
    if (columnArray.length != originColumnSize) {
        // if can not match , we should match the columns with function
        try {
            PatternCompiler pc = new Perl5Compiler();
            org.apache.oro.text.regex.Pattern pattern = null;
            pattern = pc.compile(SQL_FUNC_REGX, REGX_FLAG);
            PatternMatcher columnMatcher = new Perl5Matcher();
            if (columnMatcher.matches(originalColumns, pattern)) {
                String columnWithFunc = columnMatcher.getMatch().group(4).trim();
                if (columnWithFunc != null) {
                    //$NON-NLS-1$
                    String[] columnWithFuncArray = columnWithFunc.split(",");
                    if (columnWithFuncArray.length > 1) {
                        //$NON-NLS-1$
                        originalColumns = originalColumns.replace(columnWithFunc, "columnWithFunction");
                        return compareNodeTableColumnsWithFunc(node, originalColumns);
                    }
                }
            }
        } catch (MalformedPatternException e) {
            return false;
        }
        return false;
    }
    return true;
}
Also used : Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler) PatternCompiler(org.apache.oro.text.regex.PatternCompiler) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) IMetadataTable(org.talend.core.model.metadata.IMetadataTable) MalformedPatternException(org.apache.oro.text.regex.MalformedPatternException) PatternMatcher(org.apache.oro.text.regex.PatternMatcher)

Example 17 with PatternMatcher

use of org.apache.oro.text.regex.PatternMatcher in project nutch by apache.

the class JSParseFilter method getJSLinks.

// Alternative pattern, which limits valid url characters.
// private static final String URI_PATTERN =
// "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
/**
 * This method extracts URLs from literals embedded in JavaScript.
 */
private Outlink[] getJSLinks(String plainText, String anchor, String base) {
    final List<Outlink> outlinks = new ArrayList<Outlink>();
    URL baseURL = null;
    try {
        baseURL = new URL(base);
    } catch (Exception e) {
        if (LOG.isErrorEnabled()) {
            LOG.error("getJSLinks", e);
        }
    }
    try {
        final PatternCompiler cp = new Perl5Compiler();
        final Pattern pattern = cp.compile(STRING_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
        final Pattern pattern1 = cp.compile(URI_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
        final PatternMatcher matcher = new Perl5Matcher();
        final PatternMatcher matcher1 = new Perl5Matcher();
        final PatternMatcherInput input = new PatternMatcherInput(plainText);
        MatchResult result;
        String url;
        // loop the matches
        while (matcher.contains(input, pattern)) {
            result = matcher.getMatch();
            url = result.group(2);
            PatternMatcherInput input1 = new PatternMatcherInput(url);
            if (!matcher1.matches(input1, pattern1)) {
                // }
                continue;
            }
            if (url.startsWith("www.")) {
                url = "http://" + url;
            } else {
                // the next match.
                try {
                    url = new URL(baseURL, url).toString();
                } catch (MalformedURLException ex) {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace(" - failed URL parse '" + url + "' and baseURL '" + baseURL + "'", ex);
                    }
                    continue;
                }
            }
            url = url.replaceAll("&amp;", "&");
            if (LOG.isTraceEnabled()) {
                LOG.trace(" - outlink from JS: '" + url + "'");
            }
            outlinks.add(new Outlink(url, anchor));
        }
    } catch (Exception ex) {
        // extraction.
        if (LOG.isErrorEnabled()) {
            LOG.error("getJSLinks", ex);
        }
    }
    final Outlink[] retval;
    // create array of the Outlinks
    if (outlinks != null && outlinks.size() > 0) {
        retval = (Outlink[]) outlinks.toArray(new Outlink[0]);
    } else {
        retval = new Outlink[0];
    }
    return retval;
}
Also used : Outlink(org.apache.nutch.parse.Outlink) Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler) Pattern(org.apache.oro.text.regex.Pattern) PatternCompiler(org.apache.oro.text.regex.PatternCompiler) MalformedURLException(java.net.MalformedURLException) ArrayList(java.util.ArrayList) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult) URL(java.net.URL) MalformedURLException(java.net.MalformedURLException) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) PatternMatcher(org.apache.oro.text.regex.PatternMatcher)

Aggregations

PatternMatcher (org.apache.oro.text.regex.PatternMatcher)17 Perl5Matcher (org.apache.oro.text.regex.Perl5Matcher)11 Pattern (org.apache.oro.text.regex.Pattern)9 ArrayList (java.util.ArrayList)7 PatternCompiler (org.apache.oro.text.regex.PatternCompiler)7 Perl5Compiler (org.apache.oro.text.regex.Perl5Compiler)7 MalformedPatternException (org.apache.oro.text.regex.MalformedPatternException)5 MatchResult (org.apache.oro.text.regex.MatchResult)5 PatternMatcherInput (org.apache.oro.text.regex.PatternMatcherInput)5 MalformedURLException (java.net.MalformedURLException)2 Map (java.util.Map)2 Perl5Substitution (org.apache.oro.text.regex.Perl5Substitution)2 BaseOtterTest (com.alibaba.otter.shared.common.BaseOtterTest)1 ConfigException (com.alibaba.otter.shared.common.model.config.ConfigException)1 ModeValue (com.alibaba.otter.shared.common.model.config.data.DataMedia.ModeValue)1 URL (java.net.URL)1 LinkedList (java.util.LinkedList)1 CompoundVariable (org.apache.jmeter.engine.util.CompoundVariable)1 ApdexSummaryConsumer (org.apache.jmeter.report.processor.ApdexSummaryConsumer)1 ApdexThresholdsInfo (org.apache.jmeter.report.processor.ApdexThresholdsInfo)1