use of org.apache.oro.text.regex.PatternMatcher in project tdi-studio-se by Talend.
the class NodeQueryCheckUtil method compareNodeTableColumnsWithFunc.
/**
*
* DOC wzhang Comment method "compareNodeTableColumnsWithFunc".
*
* @param node
* @param columns
* @return
*/
private static boolean compareNodeTableColumnsWithFunc(Node node, String columns) {
String originalColumns = columns;
if (node.getMetadataList().size() == 0) {
return true;
}
IMetadataTable metaTable = node.getMetadataList().get(0);
if (metaTable == null || metaTable.getListColumns() == null) {
return true;
}
int originColumnSize = metaTable.getListColumns().size();
// modified by wzhang. replace the field to one String if it contains function
//$NON-NLS-1$
columns = columns.replaceAll(FUNC_SPLIT, "column");
//$NON-NLS-1$
String[] columnArray = columns.split(",");
// columns not match
if (columnArray.length != originColumnSize) {
// if can not match , we should match the columns with function
try {
PatternCompiler pc = new Perl5Compiler();
org.apache.oro.text.regex.Pattern pattern = null;
pattern = pc.compile(SQL_FUNC_REGX, REGX_FLAG);
PatternMatcher columnMatcher = new Perl5Matcher();
if (columnMatcher.matches(originalColumns, pattern)) {
String columnWithFunc = columnMatcher.getMatch().group(4).trim();
if (columnWithFunc != null) {
//$NON-NLS-1$
String[] columnWithFuncArray = columnWithFunc.split(",");
if (columnWithFuncArray.length > 1) {
//$NON-NLS-1$
originalColumns = originalColumns.replace(columnWithFunc, "columnWithFunction");
return compareNodeTableColumnsWithFunc(node, originalColumns);
}
}
}
} catch (MalformedPatternException e) {
return false;
}
return false;
}
return true;
}
use of org.apache.oro.text.regex.PatternMatcher in project nutch by apache.
the class JSParseFilter method getJSLinks.
// Alternative pattern, which limits valid url characters.
// private static final String URI_PATTERN =
// "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
/**
* This method extracts URLs from literals embedded in JavaScript.
*/
private Outlink[] getJSLinks(String plainText, String anchor, String base) {
final List<Outlink> outlinks = new ArrayList<Outlink>();
URL baseURL = null;
try {
baseURL = new URL(base);
} catch (Exception e) {
if (LOG.isErrorEnabled()) {
LOG.error("getJSLinks", e);
}
}
try {
final PatternCompiler cp = new Perl5Compiler();
final Pattern pattern = cp.compile(STRING_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
final Pattern pattern1 = cp.compile(URI_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
final PatternMatcher matcher = new Perl5Matcher();
final PatternMatcher matcher1 = new Perl5Matcher();
final PatternMatcherInput input = new PatternMatcherInput(plainText);
MatchResult result;
String url;
// loop the matches
while (matcher.contains(input, pattern)) {
result = matcher.getMatch();
url = result.group(2);
PatternMatcherInput input1 = new PatternMatcherInput(url);
if (!matcher1.matches(input1, pattern1)) {
// }
continue;
}
if (url.startsWith("www.")) {
url = "http://" + url;
} else {
// the next match.
try {
url = new URL(baseURL, url).toString();
} catch (MalformedURLException ex) {
if (LOG.isTraceEnabled()) {
LOG.trace(" - failed URL parse '" + url + "' and baseURL '" + baseURL + "'", ex);
}
continue;
}
}
url = url.replaceAll("&", "&");
if (LOG.isTraceEnabled()) {
LOG.trace(" - outlink from JS: '" + url + "'");
}
outlinks.add(new Outlink(url, anchor));
}
} catch (Exception ex) {
// extraction.
if (LOG.isErrorEnabled()) {
LOG.error("getJSLinks", ex);
}
}
final Outlink[] retval;
// create array of the Outlinks
if (outlinks != null && outlinks.size() > 0) {
retval = (Outlink[]) outlinks.toArray(new Outlink[0]);
} else {
retval = new Outlink[0];
}
return retval;
}
Aggregations