use of org.apache.oro.text.regex.Perl5Matcher in project Lucee by lucee.
the class Perl5Util method match.
public static Array match(String strPattern, String strInput, int offset, boolean caseSensitive) throws MalformedPatternException {
Perl5Matcher matcher = new Perl5Matcher();
PatternMatcherInput input = new PatternMatcherInput(strInput);
int compileOptions = caseSensitive ? 0 : Perl5Compiler.CASE_INSENSITIVE_MASK;
compileOptions += Perl5Compiler.MULTILINE_MASK;
if (offset < 1)
offset = 1;
Pattern pattern = getPattern(strPattern, compileOptions);
Array rtn = new ArrayImpl();
MatchResult result;
while (matcher.contains(input, pattern)) {
result = matcher.getMatch();
rtn.appendEL(result.toString());
}
return rtn;
}
use of org.apache.oro.text.regex.Perl5Matcher in project nutch by apache.
the class OutlinkExtractor method getOutlinks.
/**
* Extracts <code>Outlink</code> from given plain text and adds anchor to the
* extracted <code>Outlink</code>s
*
* @param plainText
* the plain text from wich URLs should be extracted.
* @param anchor
* the anchor of the url
*
* @return Array of <code>Outlink</code>s within found in plainText
*/
public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
long start = System.currentTimeMillis();
final List<Outlink> outlinks = new ArrayList<>();
try {
final PatternCompiler cp = new Perl5Compiler();
final Pattern pattern = cp.compile(URL_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
final PatternMatcher matcher = new Perl5Matcher();
final PatternMatcherInput input = new PatternMatcherInput(plainText);
MatchResult result;
String url;
// loop the matches
while (matcher.contains(input, pattern)) {
// do not unnecessarily hit this limit.)
if (System.currentTimeMillis() - start >= 60000L) {
if (LOG.isWarnEnabled()) {
LOG.warn("Time limit exceeded for getOutLinks");
}
break;
}
result = matcher.getMatch();
url = result.group(0);
try {
outlinks.add(new Outlink(url, anchor));
} catch (MalformedURLException mue) {
LOG.warn("Invalid url: '" + url + "', skipping.");
}
}
} catch (Exception ex) {
// on
if (LOG.isErrorEnabled()) {
LOG.error("getOutlinks", ex);
}
}
final Outlink[] retval;
// create array of the Outlinks
if (outlinks != null && outlinks.size() > 0) {
retval = outlinks.toArray(new Outlink[0]);
} else {
retval = new Outlink[0];
}
return retval;
}
use of org.apache.oro.text.regex.Perl5Matcher in project jspwiki by apache.
the class AbstractReferralPlugin method filterCollection.
/**
* Filters a collection according to the include and exclude parameters.
*
* @param c The collection to filter.
* @return A filtered collection.
*/
protected Collection filterCollection(Collection c) {
ArrayList<Object> result = new ArrayList<Object>();
PatternMatcher pm = new Perl5Matcher();
for (Iterator i = c.iterator(); i.hasNext(); ) {
String pageName = null;
Object objectje = i.next();
if (objectje instanceof WikiPage) {
pageName = ((WikiPage) objectje).getName();
} else {
pageName = (String) objectje;
}
//
// If include parameter exists, then by default we include only those
// pages in it (excluding the ones in the exclude pattern list).
//
// include='*' means the same as no include.
//
boolean includeThis = m_include == null;
if (m_include != null) {
for (int j = 0; j < m_include.length; j++) {
if (pm.matches(pageName, m_include[j])) {
includeThis = true;
break;
}
}
}
if (m_exclude != null) {
for (int j = 0; j < m_exclude.length; j++) {
if (pm.matches(pageName, m_exclude[j])) {
includeThis = false;
// The inner loop, continue on the next item
break;
}
}
}
if (includeThis) {
if (objectje instanceof WikiPage) {
result.add(objectje);
} else {
result.add(pageName);
}
//
// if we want to show the last modified date of the most recently change page, we keep a "high watermark" here:
WikiPage page = null;
if (m_lastModified) {
page = m_engine.getPage(pageName);
if (page != null) {
Date lastModPage = page.getLastModified();
if (log.isDebugEnabled()) {
log.debug("lastModified Date of page " + pageName + " : " + m_dateLastModified);
}
if (lastModPage.after(m_dateLastModified)) {
m_dateLastModified = lastModPage;
}
}
}
}
}
return result;
}
use of org.apache.oro.text.regex.Perl5Matcher in project ofbiz-framework by apache.
the class RegexpCondition method checkCondition.
@Override
public boolean checkCondition(MethodContext methodContext) throws MiniLangException {
Object fieldVal = fieldFma.get(methodContext.getEnvMap());
if (fieldVal == null) {
fieldVal = "";
} else if (!(fieldVal instanceof String)) {
try {
fieldVal = MiniLangUtil.convertType(fieldVal, String.class, methodContext.getLocale(), methodContext.getTimeZone(), null);
} catch (Exception e) {
throw new MiniLangRuntimeException(e, this);
}
}
String regExp = exprFse.expandString(methodContext.getEnvMap());
Pattern pattern = null;
try {
pattern = PatternFactory.createOrGetPerl5CompiledPattern(regExp, true);
} catch (MalformedPatternException e) {
Debug.logError(e, "Regular Expression [" + regExp + "] is mal-formed: " + e.toString(), module);
throw new MiniLangRuntimeException(e, this);
}
PatternMatcher matcher = new Perl5Matcher();
if (matcher.matches((String) fieldVal, pattern)) {
// Debug.logInfo("The string [" + fieldVal + "] matched the pattern expr [" + pattern.getPattern() + "]", module);
return true;
} else {
// Debug.logInfo("The string [" + fieldVal + "] did NOT match the pattern expr [" + pattern.getPattern() + "]", module);
return false;
}
}
use of org.apache.oro.text.regex.Perl5Matcher in project ofbiz-framework by apache.
the class CatalogUrlSeoTransform method getNiceName.
/**
* Get a string lower cased and hyphen connected.
*
* @param name a String to be transformed
* @return String nice name
*/
protected static String getNiceName(String name) {
Perl5Matcher matcher = new Perl5Matcher();
String niceName = null;
if (UtilValidate.isNotEmpty(name)) {
name = name.trim().replaceAll(" ", URL_HYPHEN);
if (UtilValidate.isNotEmpty(name) && matcher.matches(name, asciiPattern)) {
niceName = name;
}
}
return niceName;
}
Aggregations