Search in sources :

Example 51 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project tdi-studio-se by Talend.

the class VarsTable method validateColumnName.

/**
     * 
     * DOC amaumont Comment method "validateColumnName".
     * 
     * @param columnName
     * @return true if columnName has a valid value
     */
public String validateColumnName(String columnName, int beanPosition) {
    if (columnName == null) {
        //$NON-NLS-1$
        return Messages.getString("VarsTable.columnNameIsNull");
    }
    Pattern validPatternColumnNameRegexp = null;
    if (validPatternColumnNameRegexp == null) {
        try {
            validPatternColumnNameRegexp = COMPILER.compile(VALID_PATTERN_COLUMN_NAME);
        } catch (MalformedPatternException e) {
            throw new RuntimeException(e);
        }
    }
    Perl5Matcher matcher = new Perl5Matcher();
    boolean match = matcher.matches(columnName, validPatternColumnNameRegexp);
    // System.out.println(columnName + " -> "+ match);
    if (!match) {
        //$NON-NLS-1$ //$NON-NLS-2$
        return Messages.getString("VarsTable.columnNameTip") + columnName + Messages.getString("VarsTable.invalidTip");
    }
    int lstSize = dataMapTableEntries.size();
    for (int i = 0; i < lstSize; i++) {
        if (columnName.equals(dataMapTableEntries.get(i).getName()) && i != beanPosition) {
            //$NON-NLS-1$ //$NON-NLS-2$
            return Messages.getString("VarsTable.columnNameTip") + columnName + Messages.getString("VarsTable.existTip");
        }
    }
    return null;
}
Also used : Pattern(org.apache.oro.text.regex.Pattern) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MalformedPatternException(org.apache.oro.text.regex.MalformedPatternException)

Example 52 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project Lucee by lucee.

the class Perl5Util method indexOf.

/**
 * return index of the first occurence of the pattern in input text
 * @param strPattern pattern to search
 * @param strInput text to search pattern
 * @param offset
 * @param caseSensitive
 * @return position of the first occurence
 * @throws MalformedPatternException
 */
public static int indexOf(String strPattern, String strInput, int offset, boolean caseSensitive) throws MalformedPatternException {
    // Perl5Compiler compiler = new Perl5Compiler();
    PatternMatcherInput input = new PatternMatcherInput(strInput);
    Perl5Matcher matcher = new Perl5Matcher();
    int compileOptions = caseSensitive ? 0 : Perl5Compiler.CASE_INSENSITIVE_MASK;
    compileOptions += Perl5Compiler.SINGLELINE_MASK;
    if (offset < 1)
        offset = 1;
    Pattern pattern = getPattern(strPattern, compileOptions);
    if (offset <= strInput.length())
        input.setCurrentOffset(offset - 1);
    if (offset <= strInput.length() && matcher.contains(input, pattern)) {
        return matcher.getMatch().beginOffset(0) + 1;
    }
    return 0;
}
Also used : Pattern(org.apache.oro.text.regex.Pattern) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher)

Example 53 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project Lucee by lucee.

the class Perl5Util method find.

/**
 * find occurence of a pattern in a string (same like indexOf), but dont return first ocurence , it return
 * struct with all information
 * @param strPattern
 * @param strInput
 * @param offset
 * @param caseSensitive
 * @return
 * @throws MalformedPatternException
 */
public static Struct find(String strPattern, String strInput, int offset, boolean caseSensitive) throws MalformedPatternException {
    Perl5Matcher matcher = new Perl5Matcher();
    PatternMatcherInput input = new PatternMatcherInput(strInput);
    int compileOptions = caseSensitive ? 0 : Perl5Compiler.CASE_INSENSITIVE_MASK;
    compileOptions += Perl5Compiler.SINGLELINE_MASK;
    if (offset < 1)
        offset = 1;
    Pattern pattern = getPattern(strPattern, compileOptions);
    if (offset <= strInput.length())
        input.setCurrentOffset(offset - 1);
    if (offset <= strInput.length() && matcher.contains(input, pattern)) {
        MatchResult result = matcher.getMatch();
        int groupCount = result.groups();
        Array posArray = new ArrayImpl();
        Array lenArray = new ArrayImpl();
        for (int i = 0; i < groupCount; i++) {
            int off = result.beginOffset(i);
            posArray.appendEL(Integer.valueOf(off + 1));
            lenArray.appendEL(Integer.valueOf(result.endOffset(i) - off));
        }
        Struct struct = new StructImpl();
        struct.setEL("pos", posArray);
        struct.setEL("len", lenArray);
        return struct;
    }
    Array posArray = new ArrayImpl();
    Array lenArray = new ArrayImpl();
    posArray.appendEL(Constants.INTEGER_0);
    lenArray.appendEL(Constants.INTEGER_0);
    Struct struct = new StructImpl();
    struct.setEL("pos", posArray);
    struct.setEL("len", lenArray);
    return struct;
}
Also used : Array(lucee.runtime.type.Array) Pattern(org.apache.oro.text.regex.Pattern) StructImpl(lucee.runtime.type.StructImpl) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) ArrayImpl(lucee.runtime.type.ArrayImpl) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult) Struct(lucee.runtime.type.Struct)

Example 54 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project Lucee by lucee.

the class Perl5Util method _matches.

private static boolean _matches(String strPattern, String strInput) throws MalformedPatternException {
    Pattern pattern = new Perl5Compiler().compile(strPattern, Perl5Compiler.DEFAULT_MASK);
    PatternMatcherInput input = new PatternMatcherInput(strInput);
    return new Perl5Matcher().matches(input, pattern);
}
Also used : Pattern(org.apache.oro.text.regex.Pattern) Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher)

Example 55 with Perl5Matcher

use of org.apache.oro.text.regex.Perl5Matcher in project nutch by apache.

the class JSParseFilter method getJSLinks.

// Alternative pattern, which limits valid url characters.
// private static final String URI_PATTERN =
// "(^|\\s*?)[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+[/.](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2})+(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]*))?($|\\s*)";
/**
 * This method extracts URLs from literals embedded in JavaScript.
 */
private Outlink[] getJSLinks(String plainText, String anchor, String base) {
    final List<Outlink> outlinks = new ArrayList<Outlink>();
    URL baseURL = null;
    try {
        baseURL = new URL(base);
    } catch (Exception e) {
        if (LOG.isErrorEnabled()) {
            LOG.error("getJSLinks", e);
        }
    }
    try {
        final PatternCompiler cp = new Perl5Compiler();
        final Pattern pattern = cp.compile(STRING_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
        final Pattern pattern1 = cp.compile(URI_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK);
        final PatternMatcher matcher = new Perl5Matcher();
        final PatternMatcher matcher1 = new Perl5Matcher();
        final PatternMatcherInput input = new PatternMatcherInput(plainText);
        MatchResult result;
        String url;
        // loop the matches
        while (matcher.contains(input, pattern)) {
            result = matcher.getMatch();
            url = result.group(2);
            PatternMatcherInput input1 = new PatternMatcherInput(url);
            if (!matcher1.matches(input1, pattern1)) {
                // }
                continue;
            }
            if (url.startsWith("www.")) {
                url = "http://" + url;
            } else {
                // the next match.
                try {
                    url = new URL(baseURL, url).toString();
                } catch (MalformedURLException ex) {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace(" - failed URL parse '" + url + "' and baseURL '" + baseURL + "'", ex);
                    }
                    continue;
                }
            }
            url = url.replaceAll("&amp;", "&");
            if (LOG.isTraceEnabled()) {
                LOG.trace(" - outlink from JS: '" + url + "'");
            }
            outlinks.add(new Outlink(url, anchor));
        }
    } catch (Exception ex) {
        // extraction.
        if (LOG.isErrorEnabled()) {
            LOG.error("getJSLinks", ex);
        }
    }
    final Outlink[] retval;
    // create array of the Outlinks
    if (outlinks != null && outlinks.size() > 0) {
        retval = (Outlink[]) outlinks.toArray(new Outlink[0]);
    } else {
        retval = new Outlink[0];
    }
    return retval;
}
Also used : Outlink(org.apache.nutch.parse.Outlink) Perl5Compiler(org.apache.oro.text.regex.Perl5Compiler) Pattern(org.apache.oro.text.regex.Pattern) PatternCompiler(org.apache.oro.text.regex.PatternCompiler) MalformedURLException(java.net.MalformedURLException) ArrayList(java.util.ArrayList) Perl5Matcher(org.apache.oro.text.regex.Perl5Matcher) MatchResult(org.apache.oro.text.regex.MatchResult) URL(java.net.URL) MalformedURLException(java.net.MalformedURLException) PatternMatcherInput(org.apache.oro.text.regex.PatternMatcherInput) PatternMatcher(org.apache.oro.text.regex.PatternMatcher)

Aggregations

Perl5Matcher (org.apache.oro.text.regex.Perl5Matcher)55 Pattern (org.apache.oro.text.regex.Pattern)42 Perl5Compiler (org.apache.oro.text.regex.Perl5Compiler)22 MalformedPatternException (org.apache.oro.text.regex.MalformedPatternException)19 MatchResult (org.apache.oro.text.regex.MatchResult)19 PatternMatcherInput (org.apache.oro.text.regex.PatternMatcherInput)14 PatternMatcher (org.apache.oro.text.regex.PatternMatcher)11 ArrayList (java.util.ArrayList)10 PatternCompiler (org.apache.oro.text.regex.PatternCompiler)8 MalformedURLException (java.net.MalformedURLException)6 MalformedCachePatternException (org.apache.oro.text.MalformedCachePatternException)5 Perl5Substitution (org.apache.oro.text.regex.Perl5Substitution)5 IOException (java.io.IOException)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 URL (java.net.URL)3 SampleResult (org.apache.jmeter.samplers.SampleResult)3 JMeterProperty (org.apache.jmeter.testelement.property.JMeterProperty)3 PatternCacheLRU (org.apache.oro.text.PatternCacheLRU)3 BufferedReader (java.io.BufferedReader)2 EOFException (java.io.EOFException)2