use of org.iobserve.analysis.userbehavior.data.ClusteringResults in project iobserve-analysis by research-iobserve.
the class XMeansClustering method getClusteringResults.
private Optional<ClusteringResults> getClusteringResults(final Instances instances) {
final XMeans xMeansClusterer = new XMeans();
xMeansClusterer.setSeed(new Random().nextInt(Integer.MAX_VALUE));
xMeansClusterer.setDistanceF(this.distanceMetric);
xMeansClusterer.setMinNumClusters(this.minClusters);
xMeansClusterer.setMaxNumClusters(this.maxClusters);
try {
xMeansClusterer.buildClusterer(instances);
/**
* Code used from org.iobserve.analysis.userbehavior.XMeansClustering to use
* org.iobserve.analysis.userbehavior.ClusteringResults
*/
int[] clustersize = null;
final int[] assignments = new int[instances.numInstances()];
clustersize = new int[xMeansClusterer.getClusterCenters().numInstances()];
for (int s = 0; s < instances.numInstances(); s++) {
assignments[s] = xMeansClusterer.clusterInstance(instances.instance(s));
clustersize[xMeansClusterer.clusterInstance(instances.instance(s))]++;
}
final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xMeansClusterer.getClusterCenters(), instances, assignments);
clusteringMetrics.calculateSimilarityMetrics();
final ClusteringResults xMeansClusteringResults = new ClusteringResults("X-Means", xMeansClusterer.getClusterCenters().numInstances(), assignments, clusteringMetrics);
return Optional.of(xMeansClusteringResults);
} catch (final Exception e) {
// NOPMD NOCS api dependency
XMeansClustering.LOGGER.error("Clustering failed.", e);
}
return Optional.empty();
}
use of org.iobserve.analysis.userbehavior.data.ClusteringResults in project iobserve-analysis by research-iobserve.
the class XMeansClustering method clusterSessionsWithXMeans.
/**
* @param instances
* data to cluster in Weka format
* @param numberOfUserGroupsFromInputUsageModel
* is the input number of clusters
* @param varianceOfUserGroups
* enables the creation of a minimum and maximum number of clusters
* @param seed
* states a random determination of the initial centroids
* @return the clustering results that contain the number of cluster and the assignments
*/
public ClusteringResults clusterSessionsWithXMeans(final Instances instances, final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {
ClusteringResults xMeansClusteringResults = null;
try {
final XMeans xmeans = new XMeans();
xmeans.setSeed(seed);
final NormalizableDistance manhattenDistance = new ManhattanDistance();
manhattenDistance.setDontNormalize(false);
manhattenDistance.setInstances(instances);
xmeans.setDistanceF(manhattenDistance);
int[] clustersize = null;
final int[] assignments = new int[instances.numInstances()];
// Determines the range of clusters
// The X-Means clustering algorithm determines the best fitting number of clusters
// within this range by itself
int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
if (numberOfClustersMax < 2) {
numberOfClustersMax = 1;
numberOfClustersMin = 1;
} else {
if (numberOfClustersMin < 2) {
numberOfClustersMin = 2;
}
}
xmeans.setMinNumClusters(numberOfClustersMin);
xmeans.setMaxNumClusters(numberOfClustersMax);
xmeans.buildClusterer(instances);
clustersize = new int[xmeans.getClusterCenters().numInstances()];
for (int s = 0; s < instances.numInstances(); s++) {
assignments[s] = xmeans.clusterInstance(instances.instance(s));
clustersize[xmeans.clusterInstance(instances.instance(s))]++;
}
final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances, assignments);
clusteringMetrics.calculateSimilarityMetrics();
xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(), assignments, clusteringMetrics);
} catch (final Exception e) {
// NOPMD NOCS due to broken xmeans implementation triggering
// Exception
e.printStackTrace();
}
return xMeansClusteringResults;
}
use of org.iobserve.analysis.userbehavior.data.ClusteringResults in project iobserve-analysis by research-iobserve.
the class UserGroupExtraction method extractUserGroups.
/**
* Function to extract user groups.
*/
public void extractUserGroups() {
final ClusteringPrePostProcessing clusteringProcessing = new ClusteringPrePostProcessing();
final XMeansClustering xMeansClustering = new XMeansClustering();
ClusteringResults xMeansClusteringResults;
/**
* 1. Extraction of distinct system operations. Creates a list of the distinct operation
* signatures occurring within the entryCallSequenceModel. It is required to transform each
* user session to counts of its called operations. The counts are used to determine the
* similarity between the user sessions
*/
final List<String> listOfDistinctOperationSignatures = clusteringProcessing.getListOfDistinctOperationSignatures(this.entryCallSequenceModel.getUserSessions());
/**
* 2. Transformation to the call count model. Transforms the call sequences of the user
* sessions to a list of counts of calls that state the number of calls of each distinct
* operation signature for each user session
*/
final List<UserSessionAsCountsOfCalls> callCountModel = clusteringProcessing.getCallCountModel(this.entryCallSequenceModel.getUserSessions(), listOfDistinctOperationSignatures);
/**
* 3. Clustering of user sessions. Clustering of the user sessions whose behavior is
* represented as counts of their called operation signatures to obtain user groups
*/
final Instances instances = xMeansClustering.createInstances(callCountModel, listOfDistinctOperationSignatures);
/*
* The clustering is performed 5 times and the best result is taken. The quality of a
* clustering result is determined by the value of the sum of squared error (SSE) of the
* clustering. The lower the SSE is the better the clustering result.
*/
for (int i = 0; i < 5; i++) {
xMeansClusteringResults = xMeansClustering.clusterSessionsWithXMeans(instances, this.numberOfUserGroupsFromInputUsageModel, this.varianceOfUserGroups, i);
if (this.clusteringResults == null) {
this.clusteringResults = xMeansClusteringResults;
} else if (xMeansClusteringResults.getClusteringMetrics().getSumOfSquaredErrors() < this.clusteringResults.getClusteringMetrics().getSumOfSquaredErrors()) {
this.clusteringResults = xMeansClusteringResults;
}
}
/**
* 4. Obtaining the user groups' call sequence models. Creates for each cluster resp. user
* group its own entry call sequence model that exclusively contains its assigned user
* sessions
*/
final List<EntryCallSequenceModel> entryCallSequenceModelsOfXMeansClustering = clusteringProcessing.getForEachUserGroupAnEntryCallSequenceModel(this.clusteringResults, this.entryCallSequenceModel);
/**
* 5. Obtaining the user groups' workload intensity. Calculates and sets for each user group
* its specific workload intensity parameters
*/
clusteringProcessing.setTheWorkloadIntensityForTheEntryCallSequenceModels(entryCallSequenceModelsOfXMeansClustering, this.isClosedWorkload);
/**
* Sets the resulting entryCallSequenceModels that can be retrieved via the getter method
*/
this.entryCallSequenceModelsOfUserGroups = entryCallSequenceModelsOfXMeansClustering;
}
Aggregations