use of nl.basjes.parse.core.exceptions.MissingDissectorsException in project logparser by nielsbasjes.
the class ApacheHttpdlogDeserializer method deserialize.
@Override
public Object deserialize(Writable writable) throws SerDeException {
if (!(writable instanceof Text)) {
throw new SerDeException("The input MUST be a Text line.");
}
linesInput++;
try {
currentValue.clear();
parser.parse(currentValue, writable.toString());
} catch (DissectionFailure dissectionFailure) {
linesBad++;
if (linesInput >= MINIMAL_FAIL_LINES) {
if (100 * linesBad > MINIMAL_FAIL_PERCENTAGE * linesInput) {
throw new SerDeException("To many bad lines: " + linesBad + " of " + linesInput + " are bad.");
}
}
// Just return that this line is nothing.
return null;
} catch (InvalidDissectorException | MissingDissectorsException e) {
throw new SerDeException("Cannot continue; Fix the Dissectors before retrying", e);
}
for (ColumnToGetterMapping ctgm : columnToGetterMappings) {
switch(ctgm.casts) {
case STRING:
String currentValueString = currentValue.getString(ctgm.fieldValue);
row.set(ctgm.index, currentValueString);
break;
case LONG:
Long currentValueLong = currentValue.getLong(ctgm.fieldValue);
row.set(ctgm.index, currentValueLong);
break;
case DOUBLE:
Double currentValueDouble = currentValue.getDouble(ctgm.fieldValue);
row.set(ctgm.index, currentValueDouble);
break;
default:
}
}
return row;
}
use of nl.basjes.parse.core.exceptions.MissingDissectorsException in project logparser by nielsbasjes.
the class Loader method getNext.
// ------------------------------------------
@Override
public Tuple getNext() throws IOException {
Tuple tuple = null;
try {
if (isBuildingExample) {
// Terminate on the next iteration
isBuildingExample = false;
return tupleFactory.newTuple(createPigExample());
}
boolean notDone = reader.nextKeyValue();
if (!notDone) {
return null;
}
ParsedRecord value = reader.getCurrentValue();
if (value != null) {
List<Object> values = new ArrayList<>();
if (onlyWantListOfFields) {
values.add(value.getString(ApacheHttpdLogfileRecordReader.FIELDS));
} else {
for (String fieldName : requestedFields) {
if (fieldName.endsWith(".*")) {
values.add(value.getStringSet(fieldName));
continue;
} else {
EnumSet<Casts> casts = reader.getParser().getCasts(fieldName);
if (casts != null) {
if (casts.contains(Casts.LONG)) {
values.add(value.getLong(fieldName));
continue;
}
if (casts.contains(Casts.DOUBLE)) {
values.add(value.getDouble(fieldName));
continue;
}
}
}
values.add(value.getString(fieldName));
}
}
tuple = tupleFactory.newTuple(values);
}
} catch (InvalidDissectorException | MissingDissectorsException e) {
throw new IOException("Fatal error in the parser", e);
}
return tuple;
}
use of nl.basjes.parse.core.exceptions.MissingDissectorsException in project logparser by nielsbasjes.
the class Parser method assembleDissectors.
private void assembleDissectors() throws MissingDissectorsException, InvalidDissectorException {
if (assembled) {
// nothing to do.
return;
}
if (targets == null) {
// This happens only AFTER deserialization.
targets = new HashMap<>(targetsMethodNames.size());
for (Entry<String, Set<Pair<List<String>, SetterPolicy>>> entry : targetsMethodNames.entrySet()) {
String fieldName = entry.getKey();
Set<Pair<List<String>, SetterPolicy>> methodSet = entry.getValue();
Set<Pair<Method, SetterPolicy>> fieldTargets = targets.computeIfAbsent(fieldName, k -> new HashSet<>());
for (Pair<List<String>, SetterPolicy> methodStringPair : methodSet) {
List<String> methodString = methodStringPair.getLeft();
SetterPolicy setterPolicy = methodStringPair.getRight();
Method method;
String methodName = methodString.get(0);
int numberOfParameters = methodString.size() - 1;
Class<?>[] parameters = new Class[numberOfParameters];
try {
parameters[0] = Class.forName(methodString.get(1));
if (numberOfParameters == 2) {
parameters[1] = Class.forName(methodString.get(2));
}
} catch (ClassNotFoundException e) {
throw new InvalidDissectorException("Unable to locate class", e);
}
try {
method = recordClass.getMethod(methodName, parameters);
fieldTargets.add(Pair.of(method, setterPolicy));
} catch (NoSuchMethodException e) {
throw new InvalidDissectorException("Unable to locate method " + methodName, e);
}
}
targets.put(fieldName, fieldTargets);
}
}
// In some cases a dissector may need to create a special 'extra' dissector.
// Which in some cases this is a recursive problem
Set<Dissector> doneDissectors = new HashSet<>(allDissectors.size() + 10);
Set<Dissector> loopDissectors = new HashSet<>(allDissectors);
while (!loopDissectors.isEmpty()) {
for (final Dissector dissector : loopDissectors) {
dissector.createAdditionalDissectors(this);
}
doneDissectors.addAll(loopDissectors);
loopDissectors.clear();
loopDissectors.addAll(allDissectors);
loopDissectors.removeAll(doneDissectors);
}
// So
// - we have a set of needed values (targets)
// - we have a set of dissectors that can pick apart some input
// - we know where to start from
// - we need to know how to proceed
assembleDissectorPhases();
// Step 1: Acquire all potentially useful subtargets
// We first build a set of all possible subtargets that may be useful
// this way we can skip anything we know not to be useful
Set<String> needed = new HashSet<>(getNeeded());
// The root name is an empty string
needed.add(rootType + ':');
LOG.debug("Root: >>>{}:<<<", rootType);
Set<String> allPossibleSubtargets = new HashSet<>();
for (String need : needed) {
String neededName = need.substring(need.indexOf(':') + 1);
LOG.debug("Needed : >>>{}<<<", neededName);
String[] needs = neededName.split("\\.");
StringBuilder sb = new StringBuilder(need.length());
for (String part : needs) {
if (sb.length() == 0 || part.length() == 0) {
sb.append(part);
} else {
sb.append('.').append(part);
}
allPossibleSubtargets.add(sb.toString());
LOG.debug("Possible: >>>{}<<<", sb);
}
}
// Step 2: From the root we explore all possibly useful trees (recursively)
compiledDissectors = new HashMap<>();
usefulIntermediateFields = new HashSet<>();
Set<String> locatedTargets = new HashSet<>();
// The root name is an empty string
findUsefulDissectorsFromField(allPossibleSubtargets, locatedTargets, rootType, "", true);
// Step 3: Inform all dissectors to prepare for the run
for (Set<DissectorPhase> dissectorPhases : compiledDissectors.values()) {
for (DissectorPhase dissectorPhase : dissectorPhases) {
dissectorPhase.instance.prepareForRun();
}
}
if (compiledDissectors == null || compiledDissectors.isEmpty()) {
throw new MissingDissectorsException("There are no dissectors at all which makes this a completely useless parser.");
}
if (failOnMissingDissectors) {
// Step 4: As a final step we verify that every required input can be found
Set<String> missingDissectors = getTheMissingFields(locatedTargets);
if (!missingDissectors.isEmpty()) {
StringBuilder allMissing = new StringBuilder(missingDissectors.size() * 64);
for (String missing : missingDissectors) {
allMissing.append('\n').append(missing);
}
throw new MissingDissectorsException(allMissing.toString());
}
}
assembled = true;
}
use of nl.basjes.parse.core.exceptions.MissingDissectorsException in project logparser by nielsbasjes.
the class ApacheHttpdLogfileRecordReader method addRequestedFields.
private void addRequestedFields(Set<String> newRequestedFields) throws IOException {
requestedFields.addAll(newRequestedFields);
fieldList = new ArrayList<>(requestedFields);
try {
setupFields();
} catch (NoSuchMethodException | MissingDissectorsException | InvalidDissectorException e) {
throw new IOException("RecordReader initialization failed", e);
}
}
use of nl.basjes.parse.core.exceptions.MissingDissectorsException in project logparser by nielsbasjes.
the class ApacheHttpdLogfileRecordReader method initialize.
@Override
public void initialize(final InputSplit split, final TaskAttemptContext context) throws IOException {
lineReader.initialize(split, context);
final Configuration conf = context.getConfiguration();
counterLinesRead = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "1:Lines read");
counterGoodLines = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "2:Good lines");
counterBadLines = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "3:Bad lines");
if (logformat == null || requestedFields.isEmpty()) {
if (logformat == null) {
logformat = conf.get("nl.basjes.parse.apachehttpdlogline.format", "common");
}
if (requestedFields.isEmpty()) {
String fields = conf.get("nl.basjes.parse.apachehttpdlogline.fields", null);
if (fields != null) {
fieldList = Arrays.asList(fields.split(","));
}
} else {
fieldList = new ArrayList<>(requestedFields);
}
}
if (fieldList != null) {
if (logformat != null && parser == null) {
parser = createParser();
}
for (String field : fieldList) {
currentValue.declareRequestedFieldname(field);
}
}
try {
setupFields();
} catch (NoSuchMethodException | MissingDissectorsException | InvalidDissectorException e) {
throw new IOException("RecordReader initialization failed", e);
}
}
Aggregations