Search in sources :

Example 16 with PatternMatcherInput

use of org.apache.oro.text.regex.PatternMatcherInput in project Lucee by lucee.

the class Perl5Util method match.

public static Array match(String strPattern, String strInput, int offset, boolean caseSensitive) throws MalformedPatternException {
    Perl5Matcher matcher = new Perl5Matcher();
    PatternMatcherInput input = new PatternMatcherInput(strInput);
    int compileOptions = caseSensitive ? 0 : Perl5Compiler.CASE_INSENSITIVE_MASK;
    compileOptions += Perl5Compiler.MULTILINE_MASK;
    if (offset < 1)
        offset = 1;
    Pattern pattern = getPattern(strPattern, compileOptions);
    Array rtn = new ArrayImpl();
    MatchResult result;
    while (matcher.contains(input, pattern)) {
        result = matcher.getMatch();
        rtn.appendEL(result.toString());
    }
    return rtn;
}
Also used : Array(lucee.runtime.type.Array) Pattern(org.apache.oro.text.regex.Pattern) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) ArrayImpl(lucee.runtime.type.ArrayImpl) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult)

Example 17 with PatternMatcherInput

use of org.apache.oro.text.regex.PatternMatcherInput in project nutch by apache.

the class OutlinkExtractor method getOutlinks.

/**
 * Extracts <code>Outlink</code> from given plain text and adds anchor to the
 * extracted <code>Outlink</code>s
 *
 * @param plainText
 *          the plain text from wich URLs should be extracted.
 * @param anchor
 *          the anchor of the url
 *
 * @return Array of <code>Outlink</code>s within found in plainText
 */
public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
    long start = System.currentTimeMillis();
    final List<Outlink> outlinks = new ArrayList<>();
    try {
        final PatternCompiler cp = new Perl5Compiler();
        final Pattern pattern = cp.compile(URL_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
        final PatternMatcher matcher = new Perl5Matcher();
        final PatternMatcherInput input = new PatternMatcherInput(plainText);
        MatchResult result;
        String url;
        // loop the matches
        while (matcher.contains(input, pattern)) {
            // do not unnecessarily hit this limit.)
            if (System.currentTimeMillis() - start >= 60000L) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("Time limit exceeded for getOutLinks");
                }
                break;
            }
            result = matcher.getMatch();
            url = result.group(0);
            try {
                outlinks.add(new Outlink(url, anchor));
            } catch (MalformedURLException mue) {
                LOG.warn("Invalid url: '" + url + "', skipping.");
            }
        }
    } catch (Exception ex) {
        // on
        if (LOG.isErrorEnabled()) {
            LOG.error("getOutlinks", ex);
        }
    }
    final Outlink[] retval;
    // create array of the Outlinks
    if (outlinks != null && outlinks.size() > 0) {
        retval = outlinks.toArray(new Outlink[0]);
    } else {
        retval = new Outlink[0];
    }
    return retval;
}
Also used : Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler) Pattern(org.apache.oro.text.regex.Pattern) PatternCompiler(org.apache.oro.text.regex.PatternCompiler) MalformedURLException(java.net.MalformedURLException) ArrayList(java.util.ArrayList) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult) MalformedURLException(java.net.MalformedURLException) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) PatternMatcher(org.apache.oro.text.regex.PatternMatcher)

Example 18 with PatternMatcherInput

use of org.apache.oro.text.regex.PatternMatcherInput in project jmeter by apache.

the class TestHTTPSamplersAgainstHttpMirrorServer method getPositionOfBody.

private int getPositionOfBody(String stringToCheck) {
    Perl5Matcher localMatcher = JMeterUtils.getMatcher();
    // The headers and body are divided by a blank line
    String regularExpression = "^.$";
    Pattern pattern = JMeterUtils.getPattern(regularExpression, Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.MULTILINE_MASK);
    PatternMatcherInput input = new PatternMatcherInput(stringToCheck);
    while (localMatcher.contains(input, pattern)) {
        MatchResult match = localMatcher.getMatch();
        return match.beginOffset(0);
    }
    // No divider was found
    return -1;
}
Also used : Pattern(org.apache.oro.text.regex.Pattern) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult)

Example 19 with PatternMatcherInput

use of org.apache.oro.text.regex.PatternMatcherInput in project jmeter by apache.

the class RenderAsRegexp method process.

private String process(String textToParse) {
    Perl5Matcher matcher = new Perl5Matcher();
    PatternMatcherInput input = new PatternMatcherInput(textToParse);
    PatternCacheLRU pcLRU = new PatternCacheLRU();
    Pattern pattern;
    try {
        pattern = pcLRU.getPattern(regexpField.getText(), Perl5Compiler.READ_ONLY_MASK);
    } catch (MalformedCachePatternException e) {
        return e.toString();
    }
    List<MatchResult> matches = new LinkedList<>();
    while (matcher.contains(input, pattern)) {
        matches.add(matcher.getMatch());
    }
    // Construct a multi-line string with all matches
    StringBuilder sb = new StringBuilder();
    final int size = matches.size();
    sb.append("Match count: ").append(size).append("\n");
    for (int j = 0; j < size; j++) {
        MatchResult mr = matches.get(j);
        final int groups = mr.groups();
        for (int i = 0; i < groups; i++) {
            sb.append("Match[").append(j + 1).append("][").append(i).append("]=").append(mr.group(i)).append("\n");
        }
    }
    return sb.toString();
}
Also used : Pattern(org.apache.oro.text.regex.Pattern) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) MalformedCachePatternException(org.apache.oro.text.MalformedCachePatternException) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult) LinkedList(java.util.LinkedList) PatternCacheLRU(org.apache.oro.text.PatternCacheLRU)

Example 20 with PatternMatcherInput

use of org.apache.oro.text.regex.PatternMatcherInput in project tdi-studio-se by Talend.

the class WebServiceExpressionParser method parseInTableEntryLocations.

public Map<String, String> parseInTableEntryLocations(String expression) {
    // resultSet.clear();
    Map<String, String> map = new HashMap<String, String>();
    if (expression != null) {
        matcher.setMultiline(true);
        if (patternMatcherInput == null) {
            patternMatcherInput = new PatternMatcherInput(expression);
        } else {
            patternMatcherInput.setInput(expression);
        }
        recompilePatternIfNecessary(locationPattern);
        while (matcher.contains(patternMatcherInput, pattern)) {
            MatchResult matchResult = matcher.getMatch();
            map.put(matchResult.group(2), matchResult.group(1));
        // resultSet.add(map);
        }
    }
    // .toArray(new TableEntryLocation[0]);
    return map;
}
Also used : PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) HashMap(java.util.HashMap) MatchResult(org.apache.oro.text.regex.MatchResult)

Aggregations

PatternMatcherInput (org.apache.oro.text.regex.PatternMatcherInput)28 MatchResult (org.apache.oro.text.regex.MatchResult)20 Pattern (org.apache.oro.text.regex.Pattern)15 Perl5Matcher (org.apache.oro.text.regex.Perl5Matcher)14 ArrayList (java.util.ArrayList)9 PatternMatcher (org.apache.oro.text.regex.PatternMatcher)5 Perl5Compiler (org.apache.oro.text.regex.Perl5Compiler)5 MalformedURLException (java.net.MalformedURLException)3 HashMap (java.util.HashMap)3 MalformedCachePatternException (org.apache.oro.text.MalformedCachePatternException)3 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 Array (lucee.runtime.type.Array)2 ArrayImpl (lucee.runtime.type.ArrayImpl)2 PatternCompiler (org.apache.oro.text.regex.PatternCompiler)2 IOException (java.io.IOException)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 URL (java.net.URL)1 HashSet (java.util.HashSet)1 Struct (lucee.runtime.type.Struct)1