Search in sources :

Example 16 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project Lucee by lucee.

the class Perl5Util method match.

public static Array match(String strPattern, String strInput, int offset, boolean caseSensitive) throws MalformedPatternException {
    Perl5Matcher matcher = new Perl5Matcher();
    PatternMatcherInput input = new PatternMatcherInput(strInput);
    int compileOptions = caseSensitive ? 0 : Perl5Compiler.CASE_INSENSITIVE_MASK;
    compileOptions += Perl5Compiler.MULTILINE_MASK;
    if (offset < 1)
        offset = 1;
    Pattern pattern = getPattern(strPattern, compileOptions);
    Array rtn = new ArrayImpl();
    MatchResult result;
    while (matcher.contains(input, pattern)) {
        result = matcher.getMatch();
        rtn.appendEL(result.toString());
    }
    return rtn;
}
Also used : Array(lucee.runtime.type.Array) Pattern(org.apache.oro.text.regex.Pattern) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) ArrayImpl(lucee.runtime.type.ArrayImpl) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult)

Example 17 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project nutch by apache.

the class OutlinkExtractor method getOutlinks.

/**
 * Extracts <code>Outlink</code> from given plain text and adds anchor to the
 * extracted <code>Outlink</code>s
 *
 * @param plainText
 *          the plain text from wich URLs should be extracted.
 * @param anchor
 *          the anchor of the url
 *
 * @return Array of <code>Outlink</code>s within found in plainText
 */
public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
    long start = System.currentTimeMillis();
    final List<Outlink> outlinks = new ArrayList<>();
    try {
        final PatternCompiler cp = new Perl5Compiler();
        final Pattern pattern = cp.compile(URL_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
        final PatternMatcher matcher = new Perl5Matcher();
        final PatternMatcherInput input = new PatternMatcherInput(plainText);
        MatchResult result;
        String url;
        // loop the matches
        while (matcher.contains(input, pattern)) {
            // do not unnecessarily hit this limit.)
            if (System.currentTimeMillis() - start >= 60000L) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn("Time limit exceeded for getOutLinks");
                }
                break;
            }
            result = matcher.getMatch();
            url = result.group(0);
            try {
                outlinks.add(new Outlink(url, anchor));
            } catch (MalformedURLException mue) {
                LOG.warn("Invalid url: '" + url + "', skipping.");
            }
        }
    } catch (Exception ex) {
        // on
        if (LOG.isErrorEnabled()) {
            LOG.error("getOutlinks", ex);
        }
    }
    final Outlink[] retval;
    // create array of the Outlinks
    if (outlinks != null && outlinks.size() > 0) {
        retval = outlinks.toArray(new Outlink[0]);
    } else {
        retval = new Outlink[0];
    }
    return retval;
}
Also used : Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler) Pattern(org.apache.oro.text.regex.Pattern) PatternCompiler(org.apache.oro.text.regex.PatternCompiler) MalformedURLException(java.net.MalformedURLException) ArrayList(java.util.ArrayList) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult) MalformedURLException(java.net.MalformedURLException) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) PatternMatcher(org.apache.oro.text.regex.PatternMatcher)

Example 18 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project jspwiki by apache.

the class AbstractReferralPlugin method filterCollection.

/**
 *  Filters a collection according to the include and exclude parameters.
 *
 *  @param c The collection to filter.
 *  @return A filtered collection.
 */
protected Collection filterCollection(Collection c) {
    ArrayList<Object> result = new ArrayList<Object>();
    PatternMatcher pm = new Perl5Matcher();
    for (Iterator i = c.iterator(); i.hasNext(); ) {
        String pageName = null;
        Object objectje = i.next();
        if (objectje instanceof WikiPage) {
            pageName = ((WikiPage) objectje).getName();
        } else {
            pageName = (String) objectje;
        }
        // 
        // If include parameter exists, then by default we include only those
        // pages in it (excluding the ones in the exclude pattern list).
        // 
        // include='*' means the same as no include.
        // 
        boolean includeThis = m_include == null;
        if (m_include != null) {
            for (int j = 0; j < m_include.length; j++) {
                if (pm.matches(pageName, m_include[j])) {
                    includeThis = true;
                    break;
                }
            }
        }
        if (m_exclude != null) {
            for (int j = 0; j < m_exclude.length; j++) {
                if (pm.matches(pageName, m_exclude[j])) {
                    includeThis = false;
                    // The inner loop, continue on the next item
                    break;
                }
            }
        }
        if (includeThis) {
            if (objectje instanceof WikiPage) {
                result.add(objectje);
            } else {
                result.add(pageName);
            }
            // 
            // if we want to show the last modified date of the most recently change page, we keep a "high watermark" here:
            WikiPage page = null;
            if (m_lastModified) {
                page = m_engine.getPage(pageName);
                if (page != null) {
                    Date lastModPage = page.getLastModified();
                    if (log.isDebugEnabled()) {
                        log.debug("lastModified Date of page " + pageName + " : " + m_dateLastModified);
                    }
                    if (lastModPage.after(m_dateLastModified)) {
                        m_dateLastModified = lastModPage;
                    }
                }
            }
        }
    }
    return result;
}
Also used : WikiPage(org.apache.wiki.WikiPage) ArrayList(java.util.ArrayList) Iterator(java.util.Iterator) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) PatternMatcher(org.apache.oro.text.regex.PatternMatcher) Date(java.util.Date)

Example 19 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project ofbiz-framework by apache.

the class RegexpCondition method checkCondition.

@Override
public boolean checkCondition(MethodContext methodContext) throws MiniLangException {
    Object fieldVal = fieldFma.get(methodContext.getEnvMap());
    if (fieldVal == null) {
        fieldVal = "";
    } else if (!(fieldVal instanceof String)) {
        try {
            fieldVal = MiniLangUtil.convertType(fieldVal, String.class, methodContext.getLocale(), methodContext.getTimeZone(), null);
        } catch (Exception e) {
            throw new MiniLangRuntimeException(e, this);
        }
    }
    String regExp = exprFse.expandString(methodContext.getEnvMap());
    Pattern pattern = null;
    try {
        pattern = PatternFactory.createOrGetPerl5CompiledPattern(regExp, true);
    } catch (MalformedPatternException e) {
        Debug.logError(e, "Regular Expression [" + regExp + "] is mal-formed: " + e.toString(), module);
        throw new MiniLangRuntimeException(e, this);
    }
    PatternMatcher matcher = new Perl5Matcher();
    if (matcher.matches((String) fieldVal, pattern)) {
        // Debug.logInfo("The string [" + fieldVal + "] matched the pattern expr [" + pattern.getPattern() + "]", module);
        return true;
    } else {
        // Debug.logInfo("The string [" + fieldVal + "] did NOT match the pattern expr [" + pattern.getPattern() + "]", module);
        return false;
    }
}
Also used : Pattern(org.apache.oro.text.regex.Pattern) MiniLangRuntimeException(org.apache.ofbiz.minilang.MiniLangRuntimeException) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MalformedPatternException(org.apache.oro.text.regex.MalformedPatternException) PatternMatcher(org.apache.oro.text.regex.PatternMatcher) MalformedPatternException(org.apache.oro.text.regex.MalformedPatternException) MiniLangRuntimeException(org.apache.ofbiz.minilang.MiniLangRuntimeException) MiniLangException(org.apache.ofbiz.minilang.MiniLangException)

Example 20 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project ofbiz-framework by apache.

the class CatalogUrlSeoTransform method getNiceName.

/**
 * Get a string lower cased and hyphen connected.
 *
 * @param name a String to be transformed
 * @return String nice name
 */
protected static String getNiceName(String name) {
    Perl5Matcher matcher = new Perl5Matcher();
    String niceName = null;
    if (UtilValidate.isNotEmpty(name)) {
        name = name.trim().replaceAll(" ", URL_HYPHEN);
        if (UtilValidate.isNotEmpty(name) && matcher.matches(name, asciiPattern)) {
            niceName = name;
        }
    }
    return niceName;
}
Also used : Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher)

Aggregations

Perl5Matcher (org.apache.oro.text.regex.Perl5Matcher)72 Pattern (org.apache.oro.text.regex.Pattern)50 Perl5Compiler (org.apache.oro.text.regex.Perl5Compiler)23 MalformedPatternException (org.apache.oro.text.regex.MalformedPatternException)22 MatchResult (org.apache.oro.text.regex.MatchResult)21 PatternMatcher (org.apache.oro.text.regex.PatternMatcher)17 PatternMatcherInput (org.apache.oro.text.regex.PatternMatcherInput)14 ArrayList (java.util.ArrayList)12 PatternCompiler (org.apache.oro.text.regex.PatternCompiler)8 IOException (java.io.IOException)6 MalformedURLException (java.net.MalformedURLException)6 MalformedCachePatternException (org.apache.oro.text.MalformedCachePatternException)5 Perl5Substitution (org.apache.oro.text.regex.Perl5Substitution)5 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 SampleResult (org.apache.jmeter.samplers.SampleResult)3 JMeterProperty (org.apache.jmeter.testelement.property.JMeterProperty)3 PatternCacheLRU (org.apache.oro.text.PatternCacheLRU)3 BufferedReader (java.io.BufferedReader)2 EOFException (java.io.EOFException)2 File (java.io.File)2