use of io.thekraken.grok.api.Match in project nifi by apache.
the class ExtractGrok method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue());
final String contentString;
byte[] buffer = bufferQueue.poll();
if (buffer == null) {
final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
buffer = new byte[maxBufferSize];
}
try {
final byte[] byteBuffer = buffer;
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
StreamUtils.fillBuffer(in, byteBuffer, false);
}
});
final long len = Math.min(byteBuffer.length, flowFile.getSize());
contentString = new String(byteBuffer, 0, (int) len, charset);
} finally {
bufferQueue.offer(buffer);
}
final Match gm = grok.match(contentString);
gm.captures();
if (gm.toMap().isEmpty()) {
session.transfer(flowFile, REL_NO_MATCH);
getLogger().info("Did not match any Grok Expressions for FlowFile {}", new Object[] { flowFile });
return;
}
final ObjectMapper objectMapper = new ObjectMapper();
switch(context.getProperty(DESTINATION).getValue()) {
case FLOWFILE_ATTRIBUTE:
Map<String, String> grokResults = new HashMap<>();
for (Map.Entry<String, Object> entry : gm.toMap().entrySet()) {
if (null != entry.getValue()) {
grokResults.put("grok." + entry.getKey(), entry.getValue().toString());
}
}
flowFile = session.putAllAttributes(flowFile, grokResults);
session.getProvenanceReporter().modifyAttributes(flowFile);
session.transfer(flowFile, REL_MATCH);
getLogger().info("Matched {} Grok Expressions and added attributes to FlowFile {}", new Object[] { grokResults.size(), flowFile });
break;
case FLOWFILE_CONTENT:
FlowFile conFlowfile = session.write(flowFile, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
out.write(objectMapper.writeValueAsBytes(gm.toMap()));
}
});
conFlowfile = session.putAttribute(conFlowfile, CoreAttributes.MIME_TYPE.key(), APPLICATION_JSON);
session.getProvenanceReporter().modifyContent(conFlowfile, "Replaced content with parsed Grok fields and values", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(conFlowfile, REL_MATCH);
break;
}
}
use of io.thekraken.grok.api.Match in project nifi by apache.
the class GrokRecordReader method nextRecord.
@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields) throws IOException, MalformedRecordException {
Map<String, Object> valueMap = nextMap;
nextMap = null;
StringBuilder raw = new StringBuilder();
int iterations = 0;
while (valueMap == null || valueMap.isEmpty()) {
iterations++;
final String line = nextLine == null ? reader.readLine() : nextLine;
raw.append(line);
// ensure that we don't process nextLine again
nextLine = null;
if (line == null) {
return null;
}
final Match match = grok.match(line);
match.captures();
valueMap = match.toMap();
}
if (iterations == 0 && nextLine != null) {
raw.append(nextLine);
}
// Read the next line to see if it matches the pattern (in which case we will simply leave it for
// the next call to nextRecord()) or we will attach it to the previously read record.
String stackTrace = null;
final StringBuilder trailingText = new StringBuilder();
while ((nextLine = reader.readLine()) != null) {
final Match nextLineMatch = grok.match(nextLine);
nextLineMatch.captures();
final Map<String, Object> nextValueMap = nextLineMatch.toMap();
if (nextValueMap.isEmpty()) {
// the stack trace ends. Otherwise, append the next line to the last field in the record.
if (isStartOfStackTrace(nextLine)) {
stackTrace = readStackTrace(nextLine);
raw.append("\n").append(stackTrace);
break;
} else if (append) {
trailingText.append("\n").append(nextLine);
raw.append("\n").append(nextLine);
}
} else {
// The next line matched our pattern.
nextMap = nextValueMap;
break;
}
}
final Record record = createRecord(valueMap, trailingText, stackTrace, raw.toString(), coerceTypes, dropUnknownFields);
return record;
}
use of io.thekraken.grok.api.Match in project incubator-gobblin by apache.
the class GrokToJsonConverter method createOutput.
@VisibleForTesting
JsonObject createOutput(JsonArray outputSchema, String inputRecord) throws DataConversionException {
JsonObject outputRecord = new JsonObject();
Match gm = grok.match(inputRecord);
gm.captures();
JsonElement capturesJson = JSON_PARSER.parse(gm.toJson());
for (JsonElement anOutputSchema : outputSchema) {
JsonObject outputSchemaJsonObject = anOutputSchema.getAsJsonObject();
String key = outputSchemaJsonObject.get(COLUMN_NAME_KEY).getAsString();
String type = outputSchemaJsonObject.getAsJsonObject(DATA_TYPE).get(TYPE_KEY).getAsString();
if (isFieldNull(capturesJson, key)) {
if (!outputSchemaJsonObject.get(NULLABLE).getAsBoolean()) {
throw new DataConversionException("Field " + key + " is null or not exists but it is non-nullable by the schema.");
}
outputRecord.add(key, JsonNull.INSTANCE);
} else {
JsonElement jsonElement = capturesJson.getAsJsonObject().get(key);
switch(type) {
case "int":
outputRecord.addProperty(key, jsonElement.getAsInt());
break;
case "long":
outputRecord.addProperty(key, jsonElement.getAsLong());
break;
case "double":
outputRecord.addProperty(key, jsonElement.getAsDouble());
break;
case "float":
outputRecord.addProperty(key, jsonElement.getAsFloat());
break;
case "boolean":
outputRecord.addProperty(key, jsonElement.getAsBoolean());
break;
case "string":
default:
outputRecord.addProperty(key, jsonElement.getAsString());
}
}
}
return outputRecord;
}
use of io.thekraken.grok.api.Match in project LogHub by fbacchella.
the class TestGrok method TestLoadPatterns8.
// Will fails when issue https://github.com/thekrakken/java-grok/issues/64 is corrected
@Test
public void TestLoadPatterns8() throws GrokException {
String pattern = "(?<message>client id): (?<clientid>.*)";
String input = "client id: \"name\" \"Mac OS X Mail\" \"version\" \"10.2 (3259)\" \"os\" \"Mac OS X\" \"os-version\" \"10.12.3 (16D32)\" \"vendor\" \"Apple Inc.\"";
// Validate the search is good
Pattern p = Pattern.compile("(?<message>client id): (?<clientid>.*)");
Matcher m = p.matcher(input);
if (m.matches()) {
Assert.assertEquals("\"name\" \"Mac OS X Mail\" \"version\" \"10.2 (3259)\" \"os\" \"Mac OS X\" \"os-version\" \"10.12.3 (16D32)\" \"vendor\" \"Apple Inc.\"", m.group("clientid"));
}
io.thekraken.grok.api.Grok grok = new io.thekraken.grok.api.Grok();
grok.compile(pattern, false);
Match gm = grok.match(input);
gm.captures();
Assert.assertNotEquals(gm.toMap().get("clientid"), gm.getMatch().group("clientid"));
}
use of io.thekraken.grok.api.Match in project LogHub by fbacchella.
the class Grok method processMessage.
@Override
public boolean processMessage(Event event, String field, String destination) {
if (!event.containsKey(field)) {
return false;
}
String line = event.get(field).toString();
Match gm = grok.match(line);
gm.captures();
if (!gm.isNull()) {
// Results from grok needs to be cleaned
for (Map.Entry<String, Object> e : gm.toMap().entrySet()) {
String destinationField = e.getKey();
// . is a special field name, it mean a value to put back in the original field
if (".".equals(e.getKey())) {
destinationField = field;
}
// Needed until https://github.com/thekrakken/java-grok/issues/61 is fixed
if (e.getKey().equals(e.getKey().toUpperCase()) && !".".equals(e.getKey())) {
continue;
}
if (e.getValue() == null) {
continue;
}
if (e.getValue() instanceof List) {
List<?> listvalue = (List<?>) e.getValue();
List<String> newvalues = new ArrayList<>();
listvalue.stream().filter(i -> i != null).map(i -> i.toString()).forEach(newvalues::add);
if (newvalues.size() == 0) {
continue;
} else if (newvalues.size() == 1) {
event.put(destinationField, newvalues.get(0));
} else {
event.put(destinationField, newvalues);
}
} else {
event.put(destinationField, e.getValue());
}
}
return true;
}
return false;
}
Aggregations