Search in sources :

Example 46 with MatchResult

use of java.util.regex.MatchResult in project openchemlib by Actelion.

the class StringFunctions method match.

/**
 * @param str
 * @param regex
 * @return list with points, x start, y end of matching string (offset after the last character matched).
 */
public static final List<Point> match(String str, String regex) {
    Pattern pa = Pattern.compile(regex);
    Matcher ma = pa.matcher(str);
    List<Point> li = new ArrayList<Point>();
    while (ma.find()) {
        MatchResult mr = ma.toMatchResult();
        int start = mr.start();
        int end = mr.end();
        li.add(new Point(start, end));
    }
    return li;
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Point(java.awt.Point) MatchResult(java.util.regex.MatchResult) Point(java.awt.Point)

Example 47 with MatchResult

use of java.util.regex.MatchResult in project hive by apache.

the class UDFRegExpExtract method evaluate.

public String evaluate(String s, String regex, Integer extractIndex) {
    if (s == null || regex == null) {
        return null;
    }
    if (!regex.equals(lastRegex) || p == null) {
        lastRegex = regex;
        p = Pattern.compile(regex);
    }
    Matcher m = p.matcher(s);
    if (m.find()) {
        MatchResult mr = m.toMatchResult();
        return mr.group(extractIndex);
    }
    return "";
}
Also used : Matcher(java.util.regex.Matcher) MatchResult(java.util.regex.MatchResult)

Example 48 with MatchResult

use of java.util.regex.MatchResult in project jena by apache.

the class ParameterizedSparqlString method validateSafeToInject.

/**
 * Helper method which checks whether it is safe to inject to a variable
 * parameter the given value
 *
 * @param command
 *            Current command string
 * @param var
 *            Variable
 * @param n
 *            Value to inject
 * @throws ARQException
 *             Thrown if not safe to inject, error message will describe why
 *             it is unsafe to inject
 */
protected void validateSafeToInject(String command, String var, Node n) throws ARQException {
    // Looks for the known injection attack vectors and throws an error if
    // any are encountered
    // A ?var surrounded by " or ' where the variable is a literal is an
    // attack vector
    Pattern p = Pattern.compile("\"[?$]" + var + "\"|'[?$]" + var + "'");
    if (p.matcher(command).find() && n.isLiteral()) {
        throw new ARQException("Command string is vunerable to injection attack, variable ?" + var + " appears surrounded directly by quotes and is bound to a literal which provides a SPARQL injection attack vector");
    }
    // Parse out delimiter info
    DelimiterInfo delims = this.findDelimiters(command);
    // Check each occurrence of the variable for safety
    p = Pattern.compile("([?$]" + var + ")([^\\w]|$)");
    Matcher matcher = p.matcher(command);
    while (matcher.find()) {
        MatchResult posMatch = matcher.toMatchResult();
        if (n.isLiteral()) {
            if (delims.isInsideLiteral(posMatch.start(1), posMatch.end(1))) {
                throw new ARQException("Command string is vunerable to injection attack, variable ?" + var + " appears inside of a literal and is bound to a literal which provides a SPARQL injection attack vector");
            }
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) ARQException(org.apache.jena.sparql.ARQException) Matcher(java.util.regex.Matcher) MatchResult(java.util.regex.MatchResult)

Example 49 with MatchResult

use of java.util.regex.MatchResult in project AutoRefactor by JnRouvignac.

the class OptimizeRegExCleanUp method maybeRewriteRegEx.

private boolean maybeRewriteRegEx(final StringLiteral visited) {
    String pattern = visited.getLiteralValue();
    if (COMMENT_PATTERN.matcher(pattern).find()) {
        return true;
    }
    if (QUESTION_DOT_PATTERN.matcher(pattern).find()) {
        // $NON-NLS-1$
        pattern = QUESTION_DOT_PATTERN.matcher(pattern).replaceAll("?");
    }
    if (START_PATTERN.matcher(pattern).find()) {
        // $NON-NLS-1$
        pattern = START_PATTERN.matcher(pattern).replaceAll("*");
    }
    if (PLUS_PATTERN.matcher(pattern).find()) {
        // $NON-NLS-1$
        pattern = PLUS_PATTERN.matcher(pattern).replaceAll("+");
    }
    if (DIGIT_PATTERN.matcher(pattern).find()) {
        // $NON-NLS-1$
        pattern = DIGIT_PATTERN.matcher(pattern).replaceAll("\\\\d");
    }
    if (SIMPLE_DUPLICATE_WITHOUT_REPETITOR_PATTERN.matcher(pattern).find()) {
        // $NON-NLS-1$
        pattern = SIMPLE_DUPLICATE_WITHOUT_REPETITOR_PATTERN.matcher(pattern).replaceAll("$1{2}");
    }
    if (DUPLICATE_WITHOUT_REPETITOR_PATTERN.matcher(pattern).find()) {
        // $NON-NLS-1$
        pattern = DUPLICATE_WITHOUT_REPETITOR_PATTERN.matcher(pattern).replaceAll("(?:$1){2}");
    }
    try {
        while (DUPLICATE_WITHOUT_REPETITOR_PATTERN.matcher(pattern).find()) {
            MatchResult matchResult = DUPLICATE_WITHOUT_REPETITOR_PATTERN.matcher(pattern).toMatchResult();
            // TODO Correctly handle repetition enclosing
            String pattern2;
            if ("*".equals(matchResult.group(2))) {
                // $NON-NLS-1$
                // $NON-NLS-1$
                pattern2 = DUPLICATE_WITHOUT_REPETITOR_PATTERN.matcher(pattern).replaceAll("(?:$1)+");
            } else if ("+".equals(matchResult.group(2))) {
                // $NON-NLS-1$
                // $NON-NLS-1$
                pattern2 = DUPLICATE_WITHOUT_REPETITOR_PATTERN.matcher(pattern).replaceAll("(?:$1){2,}");
            } else if ("?".equals(matchResult.group(2))) {
                // $NON-NLS-1$
                // $NON-NLS-1$
                pattern2 = DUPLICATE_WITHOUT_REPETITOR_PATTERN.matcher(pattern).replaceAll("(?:$1){1,2}");
            }
            break;
        }
    } catch (Exception e) {
        e.getMessage();
    }
    if (!Utils.equalNotNull(visited.getLiteralValue(), pattern)) {
        rewriteRegEx(visited, pattern);
        return false;
    }
    return true;
}
Also used : MatchResult(java.util.regex.MatchResult)

Example 50 with MatchResult

use of java.util.regex.MatchResult in project vcell by virtualcell.

the class ListservMail method readMultiPart.

private static void readMultiPart(Multipart mp, int partNum, int size) throws Exception {
    for (int j = 0; j < mp.getCount(); j++) {
        Part part = mp.getBodyPart(j);
        // Enumeration<Header> enumHeaders = part.getAllHeaders();
        // while(enumHeaders.hasMoreElements()){
        // Header h = enumHeaders.nextElement();
        // System.out.println(h.getName()+" "+h.getValue());
        // }
        Object obj = null;
        try {
            obj = part.getContent();
        } catch (UnsupportedEncodingException uce) {
            obj = getStringFromStream(part.getInputStream(), part.getSize());
        }
        if ((obj instanceof IMAPNestedMessage)) {
            readMessage(((IMAPMessage) obj), j, part.getSize());
        } else if (obj instanceof IMAPInputStream) {
            String s = getStringFromStream(((IMAPInputStream) obj), part.getSize());
            System.out.println("stream content= " + s.length() + " " + size + " " + s.contains("@") + " disp=" + part.getDisposition() + " type=" + part.getContentType() + " descr=" + part.getDescription());
        // DataInputStream dis = new DataInputStream(((IMAPInputStream)obj));
        // byte[] bytes = new byte[size];
        // dis.readFully(bytes);
        // String s  = new String(bytes);
        // System.out.println("stream content= "+s.length()+" "+size+" "+s.contains("@")+" disp="+part.getDisposition()+" type="+part.getContentType()+" descr="+part.getDescription());
        // dis.close();
        } else if ((obj instanceof Multipart)) {
            readMultiPart(((Multipart) obj), j, part.getSize());
        } else if ((obj instanceof String)) {
            String s = (String) obj;
            Matcher matcher = pattern.matcher(s);
            while (matcher != null && matcher.find()) {
                MatchResult matchResult = matcher.toMatchResult();
                String email = matchResult.group();
                System.out.println(email);
                bouncedEMails.add(email);
            }
            System.out.println("string content= " + s.length() + " " + size + " " + s.contains("@") + " disp=" + part.getDisposition() + " type=" + part.getContentType() + " descr=" + part.getDescription());
        } else {
            System.out.println("-----TBI part " + partNum + "\n" + obj.getClass().getName());
        }
    }
}
Also used : Multipart(javax.mail.Multipart) Matcher(java.util.regex.Matcher) Part(javax.mail.Part) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IMAPMessage(com.sun.mail.imap.IMAPMessage) IMAPInputStream(com.sun.mail.imap.IMAPInputStream) MatchResult(java.util.regex.MatchResult) IMAPNestedMessage(com.sun.mail.imap.IMAPNestedMessage)

Aggregations

MatchResult (java.util.regex.MatchResult)62 Matcher (java.util.regex.Matcher)26 Pattern (java.util.regex.Pattern)16 Scanner (java.util.Scanner)11 Point (java.awt.Point)5 Test (org.junit.Test)5 ArrayList (java.util.ArrayList)4 IOException (java.io.IOException)3 NoSuchElementException (java.util.NoSuchElementException)3 XMLStreamException (javax.xml.stream.XMLStreamException)3 MatcherState (com.github.anba.es6draft.regexp.MatcherState)2 ArrayObject (com.github.anba.es6draft.runtime.types.builtins.ArrayObject)2 InputStream (java.io.InputStream)2 Fault (org.apache.cxf.interceptor.Fault)2 CachedOutputStream (org.apache.cxf.io.CachedOutputStream)2 IterableMatchResult (com.github.anba.es6draft.regexp.IterableMatchResult)1 MatcherResult (com.github.anba.es6draft.regexp.MatcherResult)1 ScriptObject (com.github.anba.es6draft.runtime.types.ScriptObject)1 OrdinaryObject (com.github.anba.es6draft.runtime.types.builtins.OrdinaryObject)1 ImmutableMap (com.google.common.collect.ImmutableMap)1