Search in sources :

Example 26 with Perl5Compiler

use of org.apache.oro.text.regex.Perl5Compiler in project Lucee by lucee.

the class Perl5Util method getPattern.

private static Pattern getPattern(String strPattern, int type) throws MalformedPatternException {
    Object o = patterns.get(strPattern + type);
    if (o == null) {
        Pattern pattern = new Perl5Compiler().compile(strPattern, type);
        patterns.put(strPattern + type, pattern);
        return pattern;
    }
    return (Pattern) o;
}
Also used : Pattern(org.apache.oro.text.regex.Pattern) Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler)

Example 27 with Perl5Compiler

use of org.apache.oro.text.regex.Perl5Compiler in project Lucee by lucee.

the class Perl5Util method _matches.

private static boolean _matches(String strPattern, String strInput) throws MalformedPatternException {
    Pattern pattern = new Perl5Compiler().compile(strPattern, Perl5Compiler.DEFAULT_MASK);
    PatternMatcherInput input = new PatternMatcherInput(strInput);
    return new Perl5Matcher().matches(input, pattern);
}
Also used : Pattern(org.apache.oro.text.regex.Pattern) Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher)

Example 28 with Perl5Compiler

use of org.apache.oro.text.regex.Perl5Compiler in project nutch by apache.

the class JSParseFilter method getJSLinks.

// Alternative pattern, which limits valid url characters.
// private static final String URI_PATTERN =
// "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
/**
 * This method extracts URLs from literals embedded in JavaScript.
 */
private Outlink[] getJSLinks(String plainText, String anchor, String base) {
    final List<Outlink> outlinks = new ArrayList<Outlink>();
    URL baseURL = null;
    try {
        baseURL = new URL(base);
    } catch (Exception e) {
        if (LOG.isErrorEnabled()) {
            LOG.error("getJSLinks", e);
        }
    }
    try {
        final PatternCompiler cp = new Perl5Compiler();
        final Pattern pattern = cp.compile(STRING_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
        final Pattern pattern1 = cp.compile(URI_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
        final PatternMatcher matcher = new Perl5Matcher();
        final PatternMatcher matcher1 = new Perl5Matcher();
        final PatternMatcherInput input = new PatternMatcherInput(plainText);
        MatchResult result;
        String url;
        // loop the matches
        while (matcher.contains(input, pattern)) {
            result = matcher.getMatch();
            url = result.group(2);
            PatternMatcherInput input1 = new PatternMatcherInput(url);
            if (!matcher1.matches(input1, pattern1)) {
                // }
                continue;
            }
            if (url.startsWith("www.")) {
                url = "http://" + url;
            } else {
                // the next match.
                try {
                    url = new URL(baseURL, url).toString();
                } catch (MalformedURLException ex) {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace(" - failed URL parse '" + url + "' and baseURL '" + baseURL + "'", ex);
                    }
                    continue;
                }
            }
            url = url.replaceAll("&amp;", "&");
            if (LOG.isTraceEnabled()) {
                LOG.trace(" - outlink from JS: '" + url + "'");
            }
            outlinks.add(new Outlink(url, anchor));
        }
    } catch (Exception ex) {
        // extraction.
        if (LOG.isErrorEnabled()) {
            LOG.error("getJSLinks", ex);
        }
    }
    final Outlink[] retval;
    // create array of the Outlinks
    if (outlinks != null && outlinks.size() > 0) {
        retval = (Outlink[]) outlinks.toArray(new Outlink[0]);
    } else {
        retval = new Outlink[0];
    }
    return retval;
}
Also used : Outlink(org.apache.nutch.parse.Outlink) Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler) Pattern(org.apache.oro.text.regex.Pattern) PatternCompiler(org.apache.oro.text.regex.PatternCompiler) MalformedURLException(java.net.MalformedURLException) ArrayList(java.util.ArrayList) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult) URL(java.net.URL) MalformedURLException(java.net.MalformedURLException) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) PatternMatcher(org.apache.oro.text.regex.PatternMatcher)

Example 29 with Perl5Compiler

use of org.apache.oro.text.regex.Perl5Compiler in project ofbiz-framework by apache.

the class PatternFactory method createOrGetPerl5CompiledPattern.

/**
 * Compiles and caches a Perl5 regexp pattern for the given string pattern.
 * This would be of no benefits (and may bloat memory usage) if stringPattern is never the same.
 * @param stringPattern a Perl5 pattern string
 * @param caseSensitive case sensitive true/false
 * @return a <code>Pattern</code> instance for the given string pattern
 * @throws MalformedPatternException
 */
public static Pattern createOrGetPerl5CompiledPattern(String stringPattern, boolean caseSensitive) throws MalformedPatternException {
    Pattern pattern = compiledPerl5Patterns.get(stringPattern);
    if (pattern == null) {
        Perl5Compiler compiler = new Perl5Compiler();
        if (caseSensitive) {
            // READ_ONLY_MASK guarantees immutability
            pattern = compiler.compile(stringPattern, Perl5Compiler.READ_ONLY_MASK);
        } else {
            pattern = compiler.compile(stringPattern, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK);
        }
        pattern = compiledPerl5Patterns.putIfAbsentAndGet(stringPattern, pattern);
        if (Debug.verboseOn()) {
            Debug.logVerbose("Compiled and cached the pattern: '" + stringPattern, module);
        }
    }
    return pattern;
}
Also used : Pattern(org.apache.oro.text.regex.Pattern) Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler)

Example 30 with Perl5Compiler

use of org.apache.oro.text.regex.Perl5Compiler in project evosuite by EvoSuite.

the class ExpressionExecutor method visit.

@Override
public Object visit(StringBinaryComparison n, Void arg) {
    String first = (String) n.getLeftOperand().accept(this, null);
    String second = (String) n.getRightOperand().accept(this, null);
    Operator op = n.getOperator();
    switch(op) {
        case EQUALSIGNORECASE:
            return first.equalsIgnoreCase(second) ? TRUE_VALUE : FALSE_VALUE;
        case EQUALS:
            return first.equals(second) ? TRUE_VALUE : FALSE_VALUE;
        case ENDSWITH:
            return first.endsWith(second) ? TRUE_VALUE : FALSE_VALUE;
        case CONTAINS:
            return first.contains(second) ? TRUE_VALUE : FALSE_VALUE;
        case PATTERNMATCHES:
            return second.matches(first) ? TRUE_VALUE : FALSE_VALUE;
        case APACHE_ORO_PATTERN_MATCHES:
            {
                Perl5Matcher matcher = new Perl5Matcher();
                Perl5Compiler compiler = new Perl5Compiler();
                Pattern pattern;
                try {
                    pattern = compiler.compile(first);
                } catch (MalformedPatternException e) {
                    throw new RuntimeException(e);
                }
                return matcher.matches(second, pattern) ? TRUE_VALUE : FALSE_VALUE;
            }
        default:
            log.warn("StringComparison: unimplemented operator!" + op);
            return null;
    }
}
Also used : Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler) Pattern(org.apache.oro.text.regex.Pattern) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MalformedPatternException(org.apache.oro.text.regex.MalformedPatternException)

Aggregations

Perl5Compiler (org.apache.oro.text.regex.Perl5Compiler)30 Pattern (org.apache.oro.text.regex.Pattern)26 MalformedPatternException (org.apache.oro.text.regex.MalformedPatternException)23 Perl5Matcher (org.apache.oro.text.regex.Perl5Matcher)23 PatternCompiler (org.apache.oro.text.regex.PatternCompiler)10 PatternMatcher (org.apache.oro.text.regex.PatternMatcher)7 MatchResult (org.apache.oro.text.regex.MatchResult)6 PatternMatcherInput (org.apache.oro.text.regex.PatternMatcherInput)5 MalformedURLException (java.net.MalformedURLException)4 ArrayList (java.util.ArrayList)4 IOException (java.io.IOException)3 Perl5Substitution (org.apache.oro.text.regex.Perl5Substitution)3 ITableEntry (org.talend.designer.abstractmap.model.tableentry.ITableEntry)3 BufferedReader (java.io.BufferedReader)2 EOFException (java.io.EOFException)2 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 FileNotFoundException (java.io.FileNotFoundException)2 InputStreamReader (java.io.InputStreamReader)2 Charset (java.nio.charset.Charset)2