use of org.tribuo.anomaly.Event in project ml-commons by opensearch-project.
the class AnomalyDetectionLibSVM method train.
@Override
public Model train(DataFrame dataFrame) {
KernelType kernelType = parseKernelType();
SVMParameters params = new SVMParameters<>(new SVMAnomalyType(SVMAnomalyType.SVMMode.ONE_CLASS), kernelType);
Double gamma = Optional.ofNullable(parameters.getGamma()).orElse(DEFAULT_GAMMA);
Double nu = Optional.ofNullable(parameters.getNu()).orElse(DEFAULT_NU);
params.setGamma(gamma);
params.setNu(nu);
if (parameters.getCost() != null) {
params.setCost(parameters.getCost());
}
if (parameters.getCoeff() != null) {
params.setCoeff(parameters.getCoeff());
}
if (parameters.getEpsilon() != null) {
params.setEpsilon(parameters.getEpsilon());
}
if (parameters.getDegree() != null) {
params.setDegree(parameters.getDegree());
}
MutableDataset<Event> data = TribuoUtil.generateDataset(dataFrame, new AnomalyFactory(), "Anomaly detection LibSVM training data from OpenSearch", TribuoOutputType.ANOMALY_DETECTION_LIBSVM);
LibSVMAnomalyTrainer trainer = new LibSVMAnomalyTrainer(params);
LibSVMModel libSVMModel = trainer.train(data);
((LibSVMAnomalyModel) libSVMModel).getNumberOfSupportVectors();
Model model = new Model();
model.setName(FunctionName.AD_LIBSVM.name());
model.setVersion(VERSION);
model.setContent(ModelSerDeSer.serialize(libSVMModel));
return model;
}
use of org.tribuo.anomaly.Event in project ml-commons by opensearch-project.
the class TribuoUtil method generateDataset.
/**
* Generate tribuo dataset from data frame.
* @param dataFrame features data
* @param outputFactory the tribuo output factory
* @param desc description for tribuo provenance
* @param outputType the tribuo output type
* @return tribuo dataset
*/
public static <T extends Output<T>> MutableDataset<T> generateDataset(DataFrame dataFrame, OutputFactory<T> outputFactory, String desc, TribuoOutputType outputType) {
List<Example<T>> dataset = new ArrayList<>();
Tuple<String[], double[][]> featureNamesValues = transformDataFrame(dataFrame);
ArrayExample<T> example;
for (int i = 0; i < dataFrame.size(); ++i) {
switch(outputType) {
case CLUSTERID:
example = new ArrayExample<>((T) new ClusterID(ClusterID.UNASSIGNED), featureNamesValues.v1(), featureNamesValues.v2()[i]);
break;
case REGRESSOR:
// Create single dimension tribuo regressor with name DIM-0 and value double NaN.
example = new ArrayExample<>((T) new Regressor("DIM-0", Double.NaN), featureNamesValues.v1(), featureNamesValues.v2()[i]);
break;
case ANOMALY_DETECTION_LIBSVM:
// Why we set default event type as EXPECTED(non-anomalous)
// 1. For training data, Tribuo LibSVMAnomalyTrainer only supports EXPECTED events at training time.
// 2. For prediction data, we treat the data as non-anomalous by default as Tribuo lib don't accept UNKNOWN type.
Event.EventType defaultEventType = Event.EventType.EXPECTED;
// TODO: support anomaly labels to evaluate prediction result
example = new ArrayExample<>((T) new Event(defaultEventType), featureNamesValues.v1(), featureNamesValues.v2()[i]);
break;
default:
throw new IllegalArgumentException("unknown type:" + outputType);
}
dataset.add(example);
}
SimpleDataSourceProvenance provenance = new SimpleDataSourceProvenance(desc, outputFactory);
return new MutableDataset<>(new ListDataSource<>(dataset, outputFactory, provenance));
}
use of org.tribuo.anomaly.Event in project ml-commons by opensearch-project.
the class AnomalyDetectionLibSVMTest method constructDataFrame.
private DataFrame constructDataFrame(Dataset<Event> data, boolean training, List<Event.EventType> labels) {
Iterator<Example<Event>> iterator = data.iterator();
List<ColumnMeta> columns = null;
DataFrame dataFrame = null;
while (iterator.hasNext()) {
Example<Event> example = iterator.next();
if (columns == null) {
columns = new ArrayList<>();
List<ColumnValue> columnValues = new ArrayList<>();
for (Feature feature : example) {
columns.add(new ColumnMeta(feature.getName(), ColumnType.DOUBLE));
columnValues.add(new DoubleValue(feature.getValue()));
}
ColumnMeta[] columnMetas = columns.toArray(new ColumnMeta[columns.size()]);
dataFrame = new DefaultDataFrame(columnMetas);
addRow(columnValues, training, example, dataFrame, labels);
} else {
List<ColumnValue> columnValues = new ArrayList<>();
for (Feature feature : example) {
columnValues.add(new DoubleValue(feature.getValue()));
}
addRow(columnValues, training, example, dataFrame, labels);
}
}
return dataFrame;
}
use of org.tribuo.anomaly.Event in project ml-commons by opensearch-project.
the class AnomalyDetectionLibSVM method predict.
@Override
public MLOutput predict(DataFrame dataFrame, Model model) {
if (model == null) {
throw new IllegalArgumentException("No model found for KMeans prediction.");
}
List<Prediction<Event>> predictions;
MutableDataset<Event> predictionDataset = TribuoUtil.generateDataset(dataFrame, new AnomalyFactory(), "Anomaly detection LibSVM prediction data from OpenSearch", TribuoOutputType.ANOMALY_DETECTION_LIBSVM);
LibSVMModel libSVMAnomalyModel = (LibSVMModel) ModelSerDeSer.deserialize(model.getContent());
predictions = libSVMAnomalyModel.predict(predictionDataset);
List<Map<String, Object>> adResults = new ArrayList<>();
predictions.forEach(e -> {
Map<String, Object> result = new HashMap<>();
result.put("score", e.getOutput().getScore());
result.put("anomaly_type", e.getOutput().getType().name());
adResults.add(result);
});
return MLPredictionOutput.builder().predictionResult(DataFrameBuilder.load(adResults)).build();
}
Aggregations