Search in sources :

Example 6 with RegexSubField

use of com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField in project bender by Nextdoor.

the class RegexSubstitution method getRegexMatches.

/**
 * Matches a regex against a field and extracts matching groups.
 *
 * @param devent
 * @param config
 * @return
 * @throws FieldNotFoundException
 */
private Pair<String, Map<String, Object>> getRegexMatches(DeserializedEvent devent) throws FieldNotFoundException {
    String foundSourceField = null;
    Matcher matcher = null;
    for (String sourceField : this.srcFields) {
        String sourceValue;
        try {
            sourceValue = devent.getFieldAsString(sourceField);
        } catch (FieldNotFoundException e) {
            continue;
        }
        matcher = pattern.matcher(sourceValue);
        if (matcher.find()) {
            /*
         * Keep track of the field name that we use so it can be removed later.
         */
            foundSourceField = sourceField;
            break;
        }
    }
    if (foundSourceField == null) {
        throw new FieldNotFoundException("unable to find field in: " + this.srcFields);
    }
    /*
     * Go through each match group in the field config and attempt to add that match group from the
     * regex match. If field type coercion does not succeed then field is skipped.
     */
    Map<String, Object> matchedGroups = new HashMap<String, Object>(matcher.groupCount());
    try {
        for (RegexSubField field : this.fields) {
            String matchStrVal = matcher.group(field.getRegexGroupName());
            if (matchStrVal == null) {
                continue;
            }
            switch(field.getType()) {
                case BOOLEAN:
                    matchedGroups.put(field.getKey(), Boolean.parseBoolean(matchStrVal));
                    break;
                case NUMBER:
                    matchedGroups.put(field.getKey(), NumberUtils.createNumber(matchStrVal));
                    break;
                case STRING:
                    matchedGroups.put(field.getKey(), matchStrVal);
                    break;
                default:
                    matchedGroups.put(field.getKey(), matchStrVal);
                    break;
            }
        }
    } catch (NumberFormatException e) {
        throw new FieldNotFoundException("matched field is not a number");
    }
    return new ImmutablePair<String, Map<String, Object>>(foundSourceField, matchedGroups);
}
Also used : RegexSubField(com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) Matcher(java.util.regex.Matcher) HashMap(java.util.HashMap) FieldNotFoundException(com.nextdoor.bender.deserializer.FieldNotFoundException)

Example 7 with RegexSubField

use of com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField in project bender by Nextdoor.

the class RegexSubstitutionTest method testRegexSrcNotFoundFail.

@Test(expected = OperationException.class)
public void testRegexSrcNotFoundFail() throws FieldNotFoundException {
    List<RegexSubField> regexSubFields = Arrays.asList(new RegexSubField("q", RegexSubField.RegexSubFieldType.STRING, "q"));
    String pattern = "(?<q>(\\d+))";
    ArrayList<Substitution> substitutions = new ArrayList<Substitution>();
    substitutions.add(new RegexSubstitution(Arrays.asList("foo", "foo1"), Pattern.compile(pattern), regexSubFields, false, true, true));
    DummpyMapEvent devent = new DummpyMapEvent();
    devent.setField("foo", "aaa");
    devent.setField("foo1", "bbb");
    InternalEvent ievent = new InternalEvent("", null, 0);
    ievent.setEventObj(devent);
    SubstitutionOperation op = new SubstitutionOperation(substitutions);
    op.perform(ievent);
}
Also used : RegexSubField(com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField) RegexSubstitution(com.nextdoor.bender.operation.substitution.regex.RegexSubstitution) RegexSubstitution(com.nextdoor.bender.operation.substitution.regex.RegexSubstitution) ArrayList(java.util.ArrayList) DummpyMapEvent(com.nextdoor.bender.testutils.DummyDeserializerHelper.DummpyMapEvent) InternalEvent(com.nextdoor.bender.InternalEvent) Test(org.junit.Test)

Example 8 with RegexSubField

use of com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField in project bender by Nextdoor.

the class RegexSubstitutionTest method testRegexFieldCoercionSrcNotFound.

@Test(expected = OperationException.class)
public void testRegexFieldCoercionSrcNotFound() throws FieldNotFoundException {
    List<RegexSubField> regexSubFields = Arrays.asList(new RegexSubField("q", RegexSubField.RegexSubFieldType.NUMBER, "q"));
    String pattern = "(?<q>(expectedstring))";
    ArrayList<Substitution> substitutions = new ArrayList<Substitution>();
    substitutions.add(new RegexSubstitution(Arrays.asList("foo"), Pattern.compile(pattern), regexSubFields, false, true, true));
    DummpyMapEvent devent = new DummpyMapEvent();
    devent.setField("foo", "expectedstring");
    InternalEvent ievent = new InternalEvent("", null, 0);
    ievent.setEventObj(devent);
    SubstitutionOperation op = new SubstitutionOperation(substitutions);
    op.perform(ievent);
}
Also used : RegexSubField(com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField) RegexSubstitution(com.nextdoor.bender.operation.substitution.regex.RegexSubstitution) RegexSubstitution(com.nextdoor.bender.operation.substitution.regex.RegexSubstitution) ArrayList(java.util.ArrayList) DummpyMapEvent(com.nextdoor.bender.testutils.DummyDeserializerHelper.DummpyMapEvent) InternalEvent(com.nextdoor.bender.InternalEvent) Test(org.junit.Test)

Example 9 with RegexSubField

use of com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField in project bender by Nextdoor.

the class RegexSubstitutionTest method testRegexMultipleSourcesNonMatch.

@Test
public void testRegexMultipleSourcesNonMatch() throws FieldNotFoundException {
    List<RegexSubField> regexSubFields = Arrays.asList(new RegexSubField("q", RegexSubField.RegexSubFieldType.STRING, "q"));
    String pattern = "(?<q>(\\d+))";
    ArrayList<Substitution> substitutions = new ArrayList<Substitution>();
    substitutions.add(new RegexSubstitution(Arrays.asList("foo", "foo1"), Pattern.compile(pattern), regexSubFields, false, true, true));
    DummpyMapEvent devent = new DummpyMapEvent();
    devent.setField("foo", "aaa");
    devent.setField("foo1", "123");
    InternalEvent ievent = new InternalEvent("", null, 0);
    ievent.setEventObj(devent);
    SubstitutionOperation op = new SubstitutionOperation(substitutions);
    op.perform(ievent);
    assertEquals(3, devent.payload.size());
    assertEquals("123", devent.getField("q"));
}
Also used : RegexSubField(com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField) RegexSubstitution(com.nextdoor.bender.operation.substitution.regex.RegexSubstitution) RegexSubstitution(com.nextdoor.bender.operation.substitution.regex.RegexSubstitution) ArrayList(java.util.ArrayList) DummpyMapEvent(com.nextdoor.bender.testutils.DummyDeserializerHelper.DummpyMapEvent) InternalEvent(com.nextdoor.bender.InternalEvent) Test(org.junit.Test)

Example 10 with RegexSubField

use of com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField in project bender by Nextdoor.

the class RegexSubstitutionTest method testBasicRegex.

@Test
public void testBasicRegex() throws FieldNotFoundException {
    List<RegexSubField> regexSubFields = Arrays.asList(new RegexSubField("protocol", RegexSubField.RegexSubFieldType.STRING, "http_protocol"), new RegexSubField("host", RegexSubField.RegexSubFieldType.STRING, "http_host"), new RegexSubField("port", RegexSubField.RegexSubFieldType.NUMBER, "http_port"), new RegexSubField("path", RegexSubField.RegexSubFieldType.STRING, "http_path"), new RegexSubField("page", RegexSubField.RegexSubFieldType.STRING, "http_page"), new RegexSubField("args", RegexSubField.RegexSubFieldType.STRING, "http_args"));
    String pattern = "(?:(?<protocol>http[s]):\\/\\/)?" + "(?<host>((?:www.)?(?:[^\\W\\s]|\\.|-)+[\\.][^\\W\\s]{2,4}|localhost(?=\\/)|\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}))" + "(?::(?<port>\\d*))?" + "(?<path>([\\/]?[^\\s\\?]*[\\/]{1})*)" + "(?<page>(?:\\/?([^\\s\\n\\?\\[\\]\\{\\}\\#]*(?:(?=\\.)){1}|[^\\s\\n\\?\\[\\]\\{\\}\\.\\#]*)?)([\\.]{1}[^\\s\\?\\#]*)?)?" + "(?<args>(?:\\?{1}([^\\s\\n\\#\\[\\]]*))?)";
    ArrayList<Substitution> substitutions = new ArrayList<Substitution>();
    substitutions.add(new RegexSubstitution(Arrays.asList("url"), Pattern.compile(pattern), regexSubFields, false, true, true));
    DummpyMapEvent devent = new DummpyMapEvent();
    devent.setField("url", "https://www.example.com:443/p1/p2/index.html?q=abc");
    InternalEvent ievent = new InternalEvent("", null, 0);
    ievent.setEventObj(devent);
    SubstitutionOperation op = new SubstitutionOperation(substitutions);
    op.perform(ievent);
    assertEquals("https://www.example.com:443/p1/p2/index.html?q=abc", devent.getField("url"));
    assertEquals("https", devent.getField("http_protocol"));
    assertEquals("www.example.com", devent.getField("http_host"));
    assertEquals(443, devent.getField("http_port"));
    assertEquals("/p1/p2/", devent.getField("http_path"));
    assertEquals("index.html", devent.getField("http_page"));
    assertEquals("?q=abc", devent.getField("http_args"));
}
Also used : RegexSubField(com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField) RegexSubstitution(com.nextdoor.bender.operation.substitution.regex.RegexSubstitution) RegexSubstitution(com.nextdoor.bender.operation.substitution.regex.RegexSubstitution) ArrayList(java.util.ArrayList) DummpyMapEvent(com.nextdoor.bender.testutils.DummyDeserializerHelper.DummpyMapEvent) InternalEvent(com.nextdoor.bender.InternalEvent) Test(org.junit.Test)

Aggregations

RegexSubField (com.nextdoor.bender.operation.substitution.regex.RegexSubstitutionConfig.RegexSubField)12 InternalEvent (com.nextdoor.bender.InternalEvent)11 RegexSubstitution (com.nextdoor.bender.operation.substitution.regex.RegexSubstitution)11 DummpyMapEvent (com.nextdoor.bender.testutils.DummyDeserializerHelper.DummpyMapEvent)11 ArrayList (java.util.ArrayList)11 Test (org.junit.Test)11 FieldNotFoundException (com.nextdoor.bender.deserializer.FieldNotFoundException)1 HashMap (java.util.HashMap)1 Matcher (java.util.regex.Matcher)1 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)1