Search in sources :

Example 1 with ClusteringResults

use of org.iobserve.analysis.behavior.karlsruhe.data.ClusteringResults in project iobserve-analysis by research-iobserve.

the class XMeansClustering method getClusteringResults.

private Optional<ClusteringResults> getClusteringResults(final Instances instances) {
    final XMeans xMeansClusterer = new XMeans();
    xMeansClusterer.setSeed(new Random().nextInt(Integer.MAX_VALUE));
    xMeansClusterer.setDistanceF(this.distanceMetric);
    xMeansClusterer.setMinNumClusters(this.minClusters);
    xMeansClusterer.setMaxNumClusters(this.maxClusters);
    try {
        xMeansClusterer.buildClusterer(instances);
        /**
         * Code used from org.iobserve.analysis.userbehavior.XMeansClustering to use
         * org.iobserve.analysis.userbehavior.ClusteringResults
         */
        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];
        clustersize = new int[xMeansClusterer.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xMeansClusterer.clusterInstance(instances.instance(s));
            clustersize[xMeansClusterer.clusterInstance(instances.instance(s))]++;
        }
        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xMeansClusterer.getClusterCenters(), instances, assignments);
        clusteringMetrics.calculateSimilarityMetrics();
        final ClusteringResults xMeansClusteringResults = new ClusteringResults("X-Means", xMeansClusterer.getClusterCenters().numInstances(), assignments, clusteringMetrics);
        return Optional.of(xMeansClusteringResults);
    } catch (final Exception e) {
        // NOPMD NOCS api dependency
        XMeansClustering.LOGGER.error("Clustering failed.", e);
    }
    return Optional.empty();
}
Also used : XMeans(weka.clusterers.XMeans) Random(java.util.Random) ClusteringResults(org.iobserve.analysis.behavior.karlsruhe.data.ClusteringResults) ClusteringMetrics(org.iobserve.analysis.behavior.karlsruhe.data.ClusteringMetrics)

Example 2 with ClusteringResults

use of org.iobserve.analysis.behavior.karlsruhe.data.ClusteringResults in project iobserve-analysis by research-iobserve.

the class UserGroupExtraction method extractUserGroups.

/**
 * Function to extract user groups.
 */
public void extractUserGroups() {
    final ClusteringPrePostProcessing clusteringProcessing = new ClusteringPrePostProcessing();
    final XMeansClustering xMeansClustering = new XMeansClustering();
    ClusteringResults xMeansClusteringResults;
    /**
     * 1. Extraction of distinct system operations. Creates a list of the distinct operation
     * signatures occurring within the entryCallSequenceModel. It is required to transform each
     * user session to counts of its called operations. The counts are used to determine the
     * similarity between the user sessions
     */
    final List<String> listOfDistinctOperationSignatures = clusteringProcessing.getListOfDistinctOperationSignatures(this.entryCallSequenceModel.getUserSessions());
    /**
     * 2. Transformation to the call count model. Transforms the call sequences of the user
     * sessions to a list of counts of calls that state the number of calls of each distinct
     * operation signature for each user session
     */
    final List<UserSessionAsCountsOfCalls> callCountModel = clusteringProcessing.getCallCountModel(this.entryCallSequenceModel.getUserSessions(), listOfDistinctOperationSignatures);
    /**
     * 3. Clustering of user sessions. Clustering of the user sessions whose behavior is
     * represented as counts of their called operation signatures to obtain user groups
     */
    final Instances instances = xMeansClustering.createInstances(callCountModel, listOfDistinctOperationSignatures);
    /*
         * The clustering is performed 5 times and the best result is taken. The quality of a
         * clustering result is determined by the value of the sum of squared error (SSE) of the
         * clustering. The lower the SSE is the better the clustering result.
         */
    for (int i = 0; i < 5; i++) {
        xMeansClusteringResults = xMeansClustering.clusterSessionsWithXMeans(instances, this.numberOfUserGroupsFromInputUsageModel, this.varianceOfUserGroups, i);
        if (this.clusteringResults == null) {
            this.clusteringResults = xMeansClusteringResults;
        } else if (xMeansClusteringResults.getClusteringMetrics().getSumOfSquaredErrors() < this.clusteringResults.getClusteringMetrics().getSumOfSquaredErrors()) {
            this.clusteringResults = xMeansClusteringResults;
        }
    }
    /**
     * 4. Obtaining the user groups' call sequence models. Creates for each cluster resp. user
     * group its own entry call sequence model that exclusively contains its assigned user
     * sessions
     */
    final List<UserSessionCollectionModel> entryCallSequenceModelsOfXMeansClustering = clusteringProcessing.getForEachUserGroupAnEntryCallSequenceModel(this.clusteringResults, this.entryCallSequenceModel);
    /**
     * 5. Obtaining the user groups' workload intensity. Calculates and sets for each user group
     * its specific workload intensity parameters
     */
    clusteringProcessing.setTheWorkloadIntensityForTheEntryCallSequenceModels(entryCallSequenceModelsOfXMeansClustering, this.isClosedWorkload);
    /**
     * Sets the resulting entryCallSequenceModels that can be retrieved via the getter method
     */
    this.entryCallSequenceModelsOfUserGroups = entryCallSequenceModelsOfXMeansClustering;
}
Also used : Instances(weka.core.Instances) UserSessionCollectionModel(org.iobserve.analysis.data.UserSessionCollectionModel) UserSessionAsCountsOfCalls(org.iobserve.analysis.behavior.karlsruhe.data.UserSessionAsCountsOfCalls) ClusteringResults(org.iobserve.analysis.behavior.karlsruhe.data.ClusteringResults)

Example 3 with ClusteringResults

use of org.iobserve.analysis.behavior.karlsruhe.data.ClusteringResults in project iobserve-analysis by research-iobserve.

the class XMeansClustering method clusterSessionsWithXMeans.

/**
 * @param instances
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances, final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {
    ClusteringResults xMeansClusteringResults = null;
    try {
        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);
        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);
        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];
        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }
        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);
        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }
        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances, assignments);
        clusteringMetrics.calculateSimilarityMetrics();
        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(), assignments, clusteringMetrics);
    } catch (final Exception e) {
        // NOPMD NOCS due to broken xmeans implementation triggering
        // Exception
        e.printStackTrace();
    }
    return xMeansClusteringResults;
}
Also used : XMeans(weka.clusterers.XMeans) ClusteringResults(org.iobserve.analysis.behavior.karlsruhe.data.ClusteringResults) NormalizableDistance(weka.core.NormalizableDistance) ClusteringMetrics(org.iobserve.analysis.behavior.karlsruhe.data.ClusteringMetrics) ManhattanDistance(weka.core.ManhattanDistance)

Aggregations

ClusteringResults (org.iobserve.analysis.behavior.karlsruhe.data.ClusteringResults)3 ClusteringMetrics (org.iobserve.analysis.behavior.karlsruhe.data.ClusteringMetrics)2 XMeans (weka.clusterers.XMeans)2 Random (java.util.Random)1 UserSessionAsCountsOfCalls (org.iobserve.analysis.behavior.karlsruhe.data.UserSessionAsCountsOfCalls)1 UserSessionCollectionModel (org.iobserve.analysis.data.UserSessionCollectionModel)1 Instances (weka.core.Instances)1 ManhattanDistance (weka.core.ManhattanDistance)1 NormalizableDistance (weka.core.NormalizableDistance)1