Search in sources :

Example 11 with SimpleEntry

use of java.util.AbstractMap.SimpleEntry in project aerosolve by airbnb.

the class MinKernelDenseFeatureDictionary method getKNearestNeighbors.

/**
  /**
   * Calculates the Min Kernel distance to each dictionary element.
   * Returns the top K elements as a new sparse feature.
   */
@Override
public FeatureVector getKNearestNeighbors(KNearestNeighborsOptions options, FeatureVector featureVector) {
    FeatureVector result = new FeatureVector();
    Map<String, List<Double>> denseFeatures = featureVector.getDenseFeatures();
    if (denseFeatures == null) {
        return result;
    }
    PriorityQueue<SimpleEntry<String, Double>> pq = new PriorityQueue<>(options.getNumNearest() + 1, new EntryComparator());
    String idKey = options.getIdKey();
    Map<String, Map<String, Double>> floatFeatures = new HashMap<>();
    String myId = featureVector.getStringFeatures().get(idKey).iterator().next();
    for (FeatureVector supportVector : dictionaryList) {
        Double minKernel = FeatureVectorUtil.featureVectorMinKernel(featureVector, supportVector);
        Set<String> idSet = supportVector.getStringFeatures().get(idKey);
        String id = idSet.iterator().next();
        if (id == myId)
            continue;
        SimpleEntry<String, Double> entry = new SimpleEntry<String, Double>(id, minKernel);
        pq.add(entry);
        if (pq.size() > options.getNumNearest()) {
            pq.poll();
        }
    }
    HashMap<String, Double> newFeature = new HashMap<>();
    while (pq.peek() != null) {
        SimpleEntry<String, Double> entry = pq.poll();
        newFeature.put(entry.getKey(), entry.getValue());
    }
    floatFeatures.put(options.getOutputKey(), newFeature);
    result.setFloatFeatures(floatFeatures);
    return result;
}
Also used : FeatureVector(com.airbnb.aerosolve.core.FeatureVector) SimpleEntry(java.util.AbstractMap.SimpleEntry)

Example 12 with SimpleEntry

use of java.util.AbstractMap.SimpleEntry in project stash-codesearch-plugin by palantir.

the class SearchUpdateJobImpl method doUpdate.

@Override
public void doUpdate(Client client, GitScm gitScm, GlobalSettings globalSettings) {
    if (!globalSettings.getIndexingEnabled()) {
        return;
    }
    GitCommandBuilderFactory builderFactory = gitScm.getCommandBuilderFactory();
    // List of bulk requests to execute sequentially at the end of the method
    RequestBuffer requestBuffer = new RequestBuffer(client);
    // Unique identifier for ref
    String refDesc = toString();
    // Hash of latest indexed commit
    String prevHash = getLatestIndexedHash(client);
    // Hash of latest commit on ref
    String newHash = getLatestHash(builderFactory);
    if (newHash == null) {
        log.error("Aborting since hash is invalid");
        return;
    }
    // Diff for files & process changes
    Set<SimpleEntry<String, String>> filesToAdd = new LinkedHashSet<SimpleEntry<String, String>>();
    try {
        // Get diff --raw -z tokens
        String[] diffToks = builderFactory.builder(repository).command("diff").argument("--raw").argument("--abbrev=40").argument("-z").argument(prevHash).argument(newHash).build(new StringOutputHandler(plf)).call().split("");
        // Process each diff --raw -z entry
        for (int curTok = 0; curTok < diffToks.length; ++curTok) {
            String[] statusToks = diffToks[curTok].split(" ");
            if (statusToks.length < 5) {
                break;
            }
            String status = statusToks[4];
            String oldBlob = statusToks[2];
            String newBlob = statusToks[3];
            // TODO: so many warnings!  Generics, CAEN I HAZ THEM?
            if (status.startsWith("A")) {
                String path = diffToks[++curTok];
                filesToAdd.add(new SimpleEntry<String, String>(newBlob, path));
            // File copied
            } else if (status.startsWith("C")) {
                String toPath = diffToks[curTok += 2];
                filesToAdd.add(new SimpleEntry<String, String>(newBlob, toPath));
            // File deleted
            } else if (status.startsWith("D")) {
                String path = diffToks[++curTok];
                requestBuffer.add(buildDeleteFileFromRef(client, oldBlob, path));
            // File modified
            } else if (status.startsWith("M") || status.startsWith("T")) {
                String path = diffToks[++curTok];
                if (!oldBlob.equals(newBlob)) {
                    requestBuffer.add(buildDeleteFileFromRef(client, oldBlob, path));
                    filesToAdd.add(new SimpleEntry<String, String>(newBlob, path));
                }
            // File renamed
            } else if (status.startsWith("R")) {
                String fromPath = diffToks[++curTok];
                String toPath = diffToks[++curTok];
                requestBuffer.add(buildDeleteFileFromRef(client, oldBlob, fromPath));
                filesToAdd.add(new SimpleEntry<String, String>(newBlob, toPath));
            // Unknown change
            } else if (status.startsWith("X")) {
                throw new RuntimeException("Status letter 'X' is a git bug.");
            }
        }
    } catch (Exception e) {
        log.error("Caught error while diffing between {} and {}, aborting update", prevHash, newHash, e);
        return;
    }
    log.debug("{} update: adding {} files", refDesc, filesToAdd.size());
    // simply add the ref to the refs array.
    if (!filesToAdd.isEmpty()) {
        try {
            BulkRequestBuilder bulkFileRefUpdate = client.prepareBulk();
            ImmutableList<SimpleEntry<String, String>> filesToAddCopy = ImmutableList.copyOf(filesToAdd);
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                String blob = bppair.getKey(), path = bppair.getValue();
                bulkFileRefUpdate.add(buildAddFileToRef(client, blob, path));
            }
            BulkItemResponse[] responses = bulkFileRefUpdate.get().getItems();
            if (responses.length != filesToAddCopy.size()) {
                throw new IndexOutOfBoundsException("Bulk resp. array must have the same length as original request array");
            }
            // Process all update responses
            int count = 0;
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                if (!responses[count].isFailed()) {
                    // Update was successful, no need to index file
                    filesToAdd.remove(bppair);
                }
                ++count;
            }
        } catch (Exception e) {
            log.warn("file-ref update failed, performing upserts for all changes", e);
        }
    }
    log.debug("{} update: {} files to upsert", refDesc, filesToAdd.size());
    // Process all changes w/o corresponding documents
    if (!filesToAdd.isEmpty()) {
        try {
            // Get filesizes and prune all files that exceed the filesize limit
            ImmutableList<SimpleEntry<String, String>> filesToAddCopy = ImmutableList.copyOf(filesToAdd);
            CatFileInputHandler catFileInput = new CatFileInputHandler();
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                catFileInput.addObject(bppair.getKey());
            }
            String[] catFileMetadata = builderFactory.builder(repository).command("cat-file").argument("--batch-check").inputHandler(catFileInput).build(new StringOutputHandler(plf)).call().split("\n");
            if (filesToAdd.size() != catFileMetadata.length) {
                throw new IndexOutOfBoundsException("git cat-file --batch-check returned wrong number of lines");
            }
            CatFileOutputHandler catFileOutput = new CatFileOutputHandler(plf);
            int count = 0;
            int maxFileSize = globalSettings.getMaxFileSize();
            for (SimpleEntry<String, String> bppair : filesToAddCopy) {
                int fs;
                try {
                    fs = Integer.parseInt(catFileMetadata[count].split("\\s")[2]);
                } catch (Exception e) {
                    fs = Integer.MAX_VALUE;
                }
                if (fs > maxFileSize) {
                    filesToAdd.remove(bppair);
                } else {
                    catFileOutput.addFile(fs);
                }
                ++count;
            }
            // Generate new cat-file input and retrieve file contents
            catFileInput = new CatFileInputHandler();
            for (SimpleEntry<String, String> bppair : filesToAdd) {
                catFileInput.addObject(bppair.getKey());
            }
            String[] fileContents = builderFactory.builder(repository).command("cat-file").argument("--batch=").inputHandler(catFileInput).build(catFileOutput).call();
            if (filesToAdd.size() != fileContents.length) {
                throw new IndexOutOfBoundsException("git cat-file --batch= returned wrong number of files");
            }
            count = 0;
            for (SimpleEntry<String, String> bppair : filesToAdd) {
                String blob = bppair.getKey(), path = bppair.getValue();
                String fileContent = fileContents[count];
                if (fileContent != null) {
                    requestBuffer.add(buildAddFileToRef(client, blob, path).setUpsert(jsonBuilder().startObject().field("project", repository.getProject().getKey()).field("repository", repository.getSlug()).field("blob", blob).field("path", path).field("extension", FilenameUtils.getExtension(path).toLowerCase()).field("contents", fileContent).field("charcount", fileContent.length()).field("linecount", countLines(fileContent)).startArray("refs").value(ref).endArray().endObject()));
                }
                ++count;
            }
        } catch (Exception e) {
            log.error("Caught error during new file indexing, aborting update", e);
            return;
        }
    }
    // Clear memory
    filesToAdd = null;
    // Get deleted commits
    String[] deletedCommits;
    try {
        deletedCommits = builderFactory.builder(repository).command("rev-list").argument(prevHash).argument("^" + newHash).build(new StringOutputHandler(plf)).call().split("\n+");
    } catch (Exception e) {
        log.error("Caught error while scanning for deleted commits, aborting update", e);
        return;
    }
    // Remove deleted commits from ES index
    int commitsDeleted = 0;
    for (String hash : deletedCommits) {
        if (hash.length() != 40) {
            continue;
        }
        requestBuffer.add(buildDeleteCommitFromRef(client, hash));
        ++commitsDeleted;
    }
    // Get new commits
    String[] newCommits;
    try {
        newCommits = builderFactory.builder(repository).command("log").argument("--format=%H%x02%ct%x02%an%x02%ae%x02%s%x02%b%x03").argument(newHash).argument("^" + prevHash).build(new StringOutputHandler(plf)).call().split("");
    } catch (Exception e) {
        log.error("Caught error while scanning for new commits, aborting update", e);
        return;
    }
    // Add new commits to ES index
    int commitsAdded = 0;
    for (String line : newCommits) {
        try {
            // Parse each commit "line" (not really lines, since they're delimited by )
            if (line.length() <= 40) {
                continue;
            }
            if (line.charAt(0) == '\n') {
                line = line.substring(1);
            }
            String[] commitToks = line.split("", 6);
            String hash = commitToks[0];
            long timestamp = Long.parseLong(commitToks[1]) * 1000;
            String authorName = commitToks[2];
            String authorEmail = commitToks[3];
            String subject = commitToks[4];
            // bodies are optional, so this might not be present
            String body = commitToks.length < 6 ? "" : commitToks[5];
            if (hash.length() != 40) {
                continue;
            }
            // Add commit to request
            requestBuffer.add(buildAddCommitToRef(client, hash).setUpsert(jsonBuilder().startObject().field("project", repository.getProject().getKey()).field("repository", repository.getSlug()).field("hash", hash).field("commitdate", new Date(timestamp)).field("authorname", authorName).field("authoremail", authorEmail).field("subject", subject).field("body", body).startArray("refs").value(ref).endArray().endObject()));
            ++commitsAdded;
        } catch (Exception e) {
            log.warn("Caught error while constructing request object, skipping update", e);
            continue;
        }
    }
    log.debug("{} update: adding {} commits, deleting {} commits", refDesc, commitsAdded, commitsDeleted);
    // Write remaining requests and wait for completion
    requestBuffer.flush();
    // Update latest indexed note
    addLatestIndexedNote(client, newHash);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RequestBuffer(com.palantir.stash.codesearch.elasticsearch.RequestBuffer) SimpleEntry(java.util.AbstractMap.SimpleEntry) BulkItemResponse(org.elasticsearch.action.bulk.BulkItemResponse) Date(java.util.Date) GitCommandBuilderFactory(com.atlassian.stash.scm.git.GitCommandBuilderFactory) BulkRequestBuilder(org.elasticsearch.action.bulk.BulkRequestBuilder)

Example 13 with SimpleEntry

use of java.util.AbstractMap.SimpleEntry in project hazelcast by hazelcast.

the class ClientReplicatedMapTest method testSize.

private void testSize(Config config) throws Exception {
    HazelcastInstance instance1 = hazelcastFactory.newHazelcastInstance(config);
    HazelcastInstance instance2 = hazelcastFactory.newHazelcastClient();
    final ReplicatedMap<Integer, Integer> map1 = instance1.getReplicatedMap("default");
    final ReplicatedMap<Integer, Integer> map2 = instance2.getReplicatedMap("default");
    final SimpleEntry<Integer, Integer>[] testValues = buildTestValues();
    int half = testValues.length / 2;
    for (int i = 0; i < testValues.length; i++) {
        ReplicatedMap<Integer, Integer> map = i < half ? map1 : map2;
        SimpleEntry<Integer, Integer> entry = testValues[i];
        map.put(entry.getKey(), entry.getValue());
    }
    assertEquals(testValues.length, map1.size());
    assertEquals(testValues.length, map2.size());
}
Also used : HazelcastInstance(com.hazelcast.core.HazelcastInstance) SimpleEntry(java.util.AbstractMap.SimpleEntry)

Example 14 with SimpleEntry

use of java.util.AbstractMap.SimpleEntry in project hazelcast by hazelcast.

the class ClientReplicatedMapTest method testValues.

private void testValues(Config config) throws Exception {
    HazelcastInstance instance1 = hazelcastFactory.newHazelcastInstance(config);
    HazelcastInstance instance2 = hazelcastFactory.newHazelcastClient();
    final ReplicatedMap<Integer, Integer> map1 = instance1.getReplicatedMap("default");
    final ReplicatedMap<Integer, Integer> map2 = instance2.getReplicatedMap("default");
    final SimpleEntry<Integer, Integer>[] testValues = buildTestValues();
    int half = testValues.length / 2;
    for (int i = 0; i < testValues.length; i++) {
        ReplicatedMap<Integer, Integer> map = i < half ? map1 : map2;
        SimpleEntry<Integer, Integer> entry = testValues[i];
        map.put(entry.getKey(), entry.getValue());
    }
    Set<Integer> values1 = new HashSet<Integer>(map1.values());
    Set<Integer> values2 = new HashSet<Integer>(map2.values());
    for (SimpleEntry<Integer, Integer> e : testValues) {
        assertContains(values1, e.getValue());
        assertContains(values2, e.getValue());
    }
}
Also used : HazelcastInstance(com.hazelcast.core.HazelcastInstance) SimpleEntry(java.util.AbstractMap.SimpleEntry) HashSet(java.util.HashSet)

Example 15 with SimpleEntry

use of java.util.AbstractMap.SimpleEntry in project hazelcast by hazelcast.

the class ClientReplicatedMapTest method buildTestValues.

@SuppressWarnings("unchecked")
private SimpleEntry<Integer, Integer>[] buildTestValues() {
    Random random = new Random();
    SimpleEntry<Integer, Integer>[] testValues = new SimpleEntry[100];
    for (int i = 0; i < testValues.length; i++) {
        testValues[i] = new SimpleEntry<Integer, Integer>(random.nextInt(), random.nextInt());
    }
    return testValues;
}
Also used : Random(java.util.Random) SimpleEntry(java.util.AbstractMap.SimpleEntry)

Aggregations

SimpleEntry (java.util.AbstractMap.SimpleEntry)47 Test (org.junit.Test)24 ArrayList (java.util.ArrayList)21 HashMap (java.util.HashMap)15 CucumberFeature (cucumber.runtime.model.CucumberFeature)10 Result (gherkin.formatter.model.Result)10 Entry (java.util.Map.Entry)10 HashSet (java.util.HashSet)9 Map (java.util.Map)8 Metacard (ddf.catalog.data.Metacard)7 Serializable (java.io.Serializable)7 HazelcastInstance (com.hazelcast.core.HazelcastInstance)6 List (java.util.List)6 UpdateRequest (ddf.catalog.operation.UpdateRequest)5 Set (java.util.Set)5 Configuration (org.apache.commons.configuration.Configuration)5 ZookeeperConfigurationProvider (com.kixeye.chassis.bootstrap.configuration.zookeeper.ZookeeperConfigurationProvider)4 LinkedList (java.util.LinkedList)4 DetailAST (com.puppycrawl.tools.checkstyle.api.DetailAST)3 MetacardImpl (ddf.catalog.data.impl.MetacardImpl)3