use of org.apache.oro.text.regex.PatternMatcherInput in project Lucee by lucee.
the class Perl5Util method match.
public static Array match(String strPattern, String strInput, int offset, boolean caseSensitive) throws MalformedPatternException {
Perl5Matcher matcher = new Perl5Matcher();
PatternMatcherInput input = new PatternMatcherInput(strInput);
int compileOptions = caseSensitive ? 0 : Perl5Compiler.CASE_INSENSITIVE_MASK;
compileOptions += Perl5Compiler.MULTILINE_MASK;
if (offset < 1)
offset = 1;
Pattern pattern = getPattern(strPattern, compileOptions);
Array rtn = new ArrayImpl();
MatchResult result;
while (matcher.contains(input, pattern)) {
result = matcher.getMatch();
rtn.appendEL(result.toString());
}
return rtn;
}
use of org.apache.oro.text.regex.PatternMatcherInput in project nutch by apache.
the class OutlinkExtractor method getOutlinks.
/**
* Extracts <code>Outlink</code> from given plain text and adds anchor to the
* extracted <code>Outlink</code>s
*
* @param plainText
* the plain text from wich URLs should be extracted.
* @param anchor
* the anchor of the url
*
* @return Array of <code>Outlink</code>s within found in plainText
*/
public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) {
long start = System.currentTimeMillis();
final List<Outlink> outlinks = new ArrayList<>();
try {
final PatternCompiler cp = new Perl5Compiler();
final Pattern pattern = cp.compile(URL_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
final PatternMatcher matcher = new Perl5Matcher();
final PatternMatcherInput input = new PatternMatcherInput(plainText);
MatchResult result;
String url;
// loop the matches
while (matcher.contains(input, pattern)) {
// do not unnecessarily hit this limit.)
if (System.currentTimeMillis() - start >= 60000L) {
if (LOG.isWarnEnabled()) {
LOG.warn("Time limit exceeded for getOutLinks");
}
break;
}
result = matcher.getMatch();
url = result.group(0);
try {
outlinks.add(new Outlink(url, anchor));
} catch (MalformedURLException mue) {
LOG.warn("Invalid url: '" + url + "', skipping.");
}
}
} catch (Exception ex) {
// on
if (LOG.isErrorEnabled()) {
LOG.error("getOutlinks", ex);
}
}
final Outlink[] retval;
// create array of the Outlinks
if (outlinks != null && outlinks.size() > 0) {
retval = outlinks.toArray(new Outlink[0]);
} else {
retval = new Outlink[0];
}
return retval;
}
use of org.apache.oro.text.regex.PatternMatcherInput in project jmeter by apache.
the class TestHTTPSamplersAgainstHttpMirrorServer method getPositionOfBody.
private int getPositionOfBody(String stringToCheck) {
Perl5Matcher localMatcher = JMeterUtils.getMatcher();
// The headers and body are divided by a blank line
String regularExpression = "^.$";
Pattern pattern = JMeterUtils.getPattern(regularExpression, Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.MULTILINE_MASK);
PatternMatcherInput input = new PatternMatcherInput(stringToCheck);
while (localMatcher.contains(input, pattern)) {
MatchResult match = localMatcher.getMatch();
return match.beginOffset(0);
}
// No divider was found
return -1;
}
use of org.apache.oro.text.regex.PatternMatcherInput in project jmeter by apache.
the class RenderAsRegexp method process.
private String process(String textToParse) {
Perl5Matcher matcher = new Perl5Matcher();
PatternMatcherInput input = new PatternMatcherInput(textToParse);
PatternCacheLRU pcLRU = new PatternCacheLRU();
Pattern pattern;
try {
pattern = pcLRU.getPattern(regexpField.getText(), Perl5Compiler.READ_ONLY_MASK);
} catch (MalformedCachePatternException e) {
return e.toString();
}
List<MatchResult> matches = new LinkedList<>();
while (matcher.contains(input, pattern)) {
matches.add(matcher.getMatch());
}
// Construct a multi-line string with all matches
StringBuilder sb = new StringBuilder();
final int size = matches.size();
sb.append("Match count: ").append(size).append("\n");
for (int j = 0; j < size; j++) {
MatchResult mr = matches.get(j);
final int groups = mr.groups();
for (int i = 0; i < groups; i++) {
sb.append("Match[").append(j + 1).append("][").append(i).append("]=").append(mr.group(i)).append("\n");
}
}
return sb.toString();
}
use of org.apache.oro.text.regex.PatternMatcherInput in project tdi-studio-se by Talend.
the class WebServiceExpressionParser method parseInTableEntryLocations.
public Map<String, String> parseInTableEntryLocations(String expression) {
// resultSet.clear();
Map<String, String> map = new HashMap<String, String>();
if (expression != null) {
matcher.setMultiline(true);
if (patternMatcherInput == null) {
patternMatcherInput = new PatternMatcherInput(expression);
} else {
patternMatcherInput.setInput(expression);
}
recompilePatternIfNecessary(locationPattern);
while (matcher.contains(patternMatcherInput, pattern)) {
MatchResult matchResult = matcher.getMatch();
map.put(matchResult.group(2), matchResult.group(1));
// resultSet.add(map);
}
}
// .toArray(new TableEntryLocation[0]);
return map;
}
Aggregations