use of java.util.concurrent.atomic.AtomicInteger in project deeplearning4j by deeplearning4j.
the class WordVectorSerializer method readWord2VecModel.
/**
* This method
* 1) Binary model, either compressed or not. Like well-known Google Model
* 2) Popular CSV word2vec text format
* 3) DL4j compressed format
*
* Please note: if extended data isn't available, only weights will be loaded instead.
*
* @param file
* @param extendedModel if TRUE, we'll try to load HS states & Huffman tree info, if FALSE, only weights will be loaded
* @return
*/
public static Word2Vec readWord2VecModel(@NonNull File file, boolean extendedModel) {
InMemoryLookupTable<VocabWord> lookupTable = new InMemoryLookupTable<>();
AbstractCache<VocabWord> vocabCache = new AbstractCache<>();
Word2Vec vec;
INDArray syn0 = null;
VectorsConfiguration configuration = new VectorsConfiguration();
if (!file.exists() || !file.isFile())
throw new ND4JIllegalStateException("File [" + file.getAbsolutePath() + "] doesn't exist");
int originalFreq = Nd4j.getMemoryManager().getOccasionalGcFrequency();
boolean originalPeriodic = Nd4j.getMemoryManager().isPeriodicGcActive();
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(false);
Nd4j.getMemoryManager().setOccasionalGcFrequency(50000);
// try to load zip format
try {
if (extendedModel) {
log.debug("Trying full model restoration...");
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(true);
Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
return readWord2Vec(file);
} else {
log.debug("Trying simplified model restoration...");
File tmpFileSyn0 = File.createTempFile("word2vec", "syn");
File tmpFileConfig = File.createTempFile("word2vec", "config");
// we don't need full model, so we go directly to syn0 file
ZipFile zipFile = new ZipFile(file);
ZipEntry syn = zipFile.getEntry("syn0.txt");
InputStream stream = zipFile.getInputStream(syn);
Files.copy(stream, Paths.get(tmpFileSyn0.getAbsolutePath()), StandardCopyOption.REPLACE_EXISTING);
// now we're restoring configuration saved earlier
ZipEntry config = zipFile.getEntry("config.json");
if (config != null) {
stream = zipFile.getInputStream(config);
StringBuilder builder = new StringBuilder();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream))) {
String line;
while ((line = reader.readLine()) != null) {
builder.append(line);
}
}
configuration = VectorsConfiguration.fromJson(builder.toString().trim());
}
ZipEntry ve = zipFile.getEntry("frequencies.txt");
if (ve != null) {
stream = zipFile.getInputStream(ve);
AtomicInteger cnt = new AtomicInteger(0);
try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream))) {
String line;
while ((line = reader.readLine()) != null) {
String[] split = line.split(" ");
VocabWord word = new VocabWord(Double.valueOf(split[1]), decodeB64(split[0]));
word.setIndex(cnt.getAndIncrement());
word.incrementSequencesCount(Long.valueOf(split[2]));
vocabCache.addToken(word);
vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
Nd4j.getMemoryManager().invokeGcOccasionally();
}
}
}
List<INDArray> rows = new ArrayList<>();
// basically read up everything, call vstacl and then return model
try (Reader reader = new CSVReader(tmpFileSyn0)) {
AtomicInteger cnt = new AtomicInteger(0);
while (reader.hasNext()) {
Pair<VocabWord, float[]> pair = reader.next();
VocabWord word = pair.getFirst();
INDArray vector = Nd4j.create(pair.getSecond());
if (ve != null) {
if (syn0 == null)
syn0 = Nd4j.create(vocabCache.numWords(), vector.length());
syn0.getRow(cnt.getAndIncrement()).assign(vector);
} else {
rows.add(vector);
vocabCache.addToken(word);
vocabCache.addWordToIndex(word.getIndex(), word.getLabel());
}
Nd4j.getMemoryManager().invokeGcOccasionally();
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(true);
Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
}
if (syn0 == null && vocabCache.numWords() > 0)
syn0 = Nd4j.vstack(rows);
if (syn0 == null) {
log.error("Can't build syn0 table");
throw new DL4JInvalidInputException("Can't build syn0 table");
}
lookupTable = new InMemoryLookupTable.Builder<VocabWord>().cache(vocabCache).vectorLength(syn0.columns()).useHierarchicSoftmax(false).useAdaGrad(false).build();
lookupTable.setSyn0(syn0);
try {
tmpFileSyn0.delete();
tmpFileConfig.delete();
} catch (Exception e) {
//
}
}
} catch (Exception e) {
// let's try to load this file as csv file
try {
log.debug("Trying CSV model restoration...");
Pair<InMemoryLookupTable, VocabCache> pair = loadTxt(file);
lookupTable = pair.getFirst();
vocabCache = (AbstractCache<VocabWord>) pair.getSecond();
} catch (Exception ex) {
// we fallback to trying binary model instead
try {
log.debug("Trying binary model restoration...");
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(true);
Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
vec = loadGoogleModel(file, true, true);
return vec;
} catch (Exception ey) {
// try to load without linebreaks
try {
if (originalPeriodic)
Nd4j.getMemoryManager().togglePeriodicGc(true);
Nd4j.getMemoryManager().setOccasionalGcFrequency(originalFreq);
vec = loadGoogleModel(file, true, false);
return vec;
} catch (Exception ez) {
throw new RuntimeException("Unable to guess input file format. Please use corresponding loader directly");
}
}
}
}
Word2Vec.Builder builder = new Word2Vec.Builder(configuration).lookupTable(lookupTable).useAdaGrad(false).vocabCache(vocabCache).layerSize(lookupTable.layerSize()).useHierarchicSoftmax(false).resetModel(false);
/*
Trying to restore TokenizerFactory & TokenPreProcessor
*/
TokenizerFactory factory = getTokenizerFactory(configuration);
if (factory != null)
builder.tokenizerFactory(factory);
vec = builder.build();
return vec;
}
use of java.util.concurrent.atomic.AtomicInteger in project elasticsearch by elastic.
the class VerifyNodeRepositoryAction method verify.
public void verify(String repository, String verificationToken, final ActionListener<VerifyResponse> listener) {
final DiscoveryNodes discoNodes = clusterService.state().nodes();
final DiscoveryNode localNode = discoNodes.getLocalNode();
final ObjectContainer<DiscoveryNode> masterAndDataNodes = discoNodes.getMasterAndDataNodes().values();
final List<DiscoveryNode> nodes = new ArrayList<>();
for (ObjectCursor<DiscoveryNode> cursor : masterAndDataNodes) {
DiscoveryNode node = cursor.value;
nodes.add(node);
}
final CopyOnWriteArrayList<VerificationFailure> errors = new CopyOnWriteArrayList<>();
final AtomicInteger counter = new AtomicInteger(nodes.size());
for (final DiscoveryNode node : nodes) {
if (node.equals(localNode)) {
try {
doVerify(repository, verificationToken, localNode);
} catch (Exception e) {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("[{}] failed to verify repository", repository), e);
errors.add(new VerificationFailure(node.getId(), e));
}
if (counter.decrementAndGet() == 0) {
finishVerification(listener, nodes, errors);
}
} else {
transportService.sendRequest(node, ACTION_NAME, new VerifyNodeRepositoryRequest(repository, verificationToken), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
@Override
public void handleResponse(TransportResponse.Empty response) {
if (counter.decrementAndGet() == 0) {
finishVerification(listener, nodes, errors);
}
}
@Override
public void handleException(TransportException exp) {
errors.add(new VerificationFailure(node.getId(), exp));
if (counter.decrementAndGet() == 0) {
finishVerification(listener, nodes, errors);
}
}
});
}
}
}
use of java.util.concurrent.atomic.AtomicInteger in project elasticsearch by elastic.
the class CreateIndexIT method testCreateAndDeleteIndexConcurrently.
public void testCreateAndDeleteIndexConcurrently() throws InterruptedException {
createIndex("test");
final AtomicInteger indexVersion = new AtomicInteger(0);
final Object indexVersionLock = new Object();
final CountDownLatch latch = new CountDownLatch(1);
int numDocs = randomIntBetween(1, 10);
for (int i = 0; i < numDocs; i++) {
client().prepareIndex("test", "test").setSource("index_version", indexVersion.get()).get();
}
synchronized (indexVersionLock) {
// not necessarily needed here but for completeness we lock here too
indexVersion.incrementAndGet();
}
client().admin().indices().prepareDelete("test").execute(new // this happens async!!!
ActionListener<DeleteIndexResponse>() {
@Override
public void onResponse(DeleteIndexResponse deleteIndexResponse) {
Thread thread = new Thread() {
@Override
public void run() {
try {
// recreate that index
client().prepareIndex("test", "test").setSource("index_version", indexVersion.get()).get();
synchronized (indexVersionLock) {
// we sync here since we have to ensure that all indexing operations below for a given ID are done before
// we increment the index version otherwise a doc that is in-flight could make it into an index that it
// was supposed to be deleted for and our assertion fail...
indexVersion.incrementAndGet();
}
// from here on all docs with index_version == 0|1 must be gone!!!! only 2 are ok;
assertAcked(client().admin().indices().prepareDelete("test").get());
} finally {
latch.countDown();
}
}
};
thread.start();
}
@Override
public void onFailure(Exception e) {
throw new RuntimeException(e);
}
});
numDocs = randomIntBetween(100, 200);
for (int i = 0; i < numDocs; i++) {
try {
synchronized (indexVersionLock) {
client().prepareIndex("test", "test").setSource("index_version", indexVersion.get()).setTimeout(TimeValue.timeValueSeconds(10)).get();
}
} catch (IndexNotFoundException inf) {
// fine
} catch (UnavailableShardsException ex) {
assertEquals(ex.getCause().getClass(), IndexNotFoundException.class);
// fine we run into a delete index while retrying
}
}
latch.await();
refresh();
// we only really assert that we never reuse segments of old indices or anything like this here and that nothing fails with
// crazy exceptions
SearchResponse expected = client().prepareSearch("test").setIndicesOptions(IndicesOptions.lenientExpandOpen()).setQuery(new RangeQueryBuilder("index_version").from(indexVersion.get(), true)).get();
SearchResponse all = client().prepareSearch("test").setIndicesOptions(IndicesOptions.lenientExpandOpen()).get();
assertEquals(expected + " vs. " + all, expected.getHits().getTotalHits(), all.getHits().getTotalHits());
logger.info("total: {}", expected.getHits().getTotalHits());
}
use of java.util.concurrent.atomic.AtomicInteger in project elasticsearch by elastic.
the class TransportActionFilterChainTests method testTooManyContinueProcessingRequest.
public void testTooManyContinueProcessingRequest() throws ExecutionException, InterruptedException {
final int additionalContinueCount = randomInt(10);
RequestTestFilter testFilter = new RequestTestFilter(randomInt(), new RequestCallback() {
@Override
public <Request extends ActionRequest, Response extends ActionResponse> void execute(Task task, String action, Request request, ActionListener<Response> listener, ActionFilterChain<Request, Response> actionFilterChain) {
for (int i = 0; i <= additionalContinueCount; i++) {
actionFilterChain.proceed(task, action, request, listener);
}
}
});
Set<ActionFilter> filters = new HashSet<>();
filters.add(testFilter);
String actionName = randomAsciiOfLength(randomInt(30));
ActionFilters actionFilters = new ActionFilters(filters);
TransportAction<TestRequest, TestResponse> transportAction = new TransportAction<TestRequest, TestResponse>(Settings.EMPTY, actionName, null, actionFilters, null, new TaskManager(Settings.EMPTY)) {
@Override
protected void doExecute(TestRequest request, ActionListener<TestResponse> listener) {
listener.onResponse(new TestResponse());
}
};
final CountDownLatch latch = new CountDownLatch(additionalContinueCount + 1);
final AtomicInteger responses = new AtomicInteger();
final List<Throwable> failures = new CopyOnWriteArrayList<>();
transportAction.execute(new TestRequest(), new ActionListener<TestResponse>() {
@Override
public void onResponse(TestResponse testResponse) {
responses.incrementAndGet();
latch.countDown();
}
@Override
public void onFailure(Exception e) {
failures.add(e);
latch.countDown();
}
});
if (!latch.await(10, TimeUnit.SECONDS)) {
fail("timeout waiting for the filter to notify the listener as many times as expected");
}
assertThat(testFilter.runs.get(), equalTo(1));
assertThat(testFilter.lastActionName, equalTo(actionName));
assertThat(responses.get(), equalTo(1));
assertThat(failures.size(), equalTo(additionalContinueCount));
for (Throwable failure : failures) {
assertThat(failure, instanceOf(IllegalStateException.class));
}
}
use of java.util.concurrent.atomic.AtomicInteger in project elasticsearch by elastic.
the class IndexRecoveryIT method testDisconnectsDuringRecovery.
/**
* Tests scenario where recovery target successfully sends recovery request to source but then the channel gets closed while
* the source is working on the recovery process.
*/
@TestLogging("_root:DEBUG,org.elasticsearch.indices.recovery:TRACE")
public void testDisconnectsDuringRecovery() throws Exception {
boolean primaryRelocation = randomBoolean();
final String indexName = "test";
final Settings nodeSettings = Settings.builder().put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), TimeValue.timeValueMillis(randomIntBetween(0, 100))).build();
TimeValue disconnectAfterDelay = TimeValue.timeValueMillis(randomIntBetween(0, 100));
// start a master node
String masterNodeName = internalCluster().startMasterOnlyNode(nodeSettings);
final String blueNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build());
final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build());
client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue").put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)).get();
List<IndexRequestBuilder> requests = new ArrayList<>();
int numDocs = scaledRandomIntBetween(25, 250);
for (int i = 0; i < numDocs; i++) {
requests.add(client().prepareIndex(indexName, "type").setSource("{}", XContentType.JSON));
}
indexRandom(true, requests);
ensureSearchable(indexName);
assertHitCount(client().prepareSearch(indexName).get(), numDocs);
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNodeName);
MockTransportService blueMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, blueNodeName);
MockTransportService redMockTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, redNodeName);
redMockTransportService.addDelegate(blueMockTransportService, new MockTransportService.DelegateTransport(redMockTransportService.original()) {
private final AtomicInteger count = new AtomicInteger();
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (PeerRecoverySourceService.Actions.START_RECOVERY.equals(action) && count.incrementAndGet() == 1) {
// ensures that it's considered as valid recovery attempt by source
try {
awaitBusy(() -> client(blueNodeName).admin().cluster().prepareState().setLocal(true).get().getState().getRoutingTable().index("test").shard(0).getAllInitializingShards().isEmpty() == false);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
super.sendRequest(connection, requestId, action, request, options);
try {
Thread.sleep(disconnectAfterDelay.millis());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
throw new ConnectTransportException(connection.getNode(), "DISCONNECT: simulation disconnect after successfully sending " + action + " request");
} else {
super.sendRequest(connection, requestId, action, request, options);
}
}
});
final AtomicBoolean finalized = new AtomicBoolean();
blueMockTransportService.addDelegate(redMockTransportService, new MockTransportService.DelegateTransport(blueMockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if (action.equals(PeerRecoveryTargetService.Actions.FINALIZE)) {
finalized.set(true);
}
super.sendRequest(connection, requestId, action, request, options);
}
});
for (MockTransportService mockTransportService : Arrays.asList(redMockTransportService, blueMockTransportService)) {
mockTransportService.addDelegate(masterTransportService, new MockTransportService.DelegateTransport(mockTransportService.original()) {
@Override
protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
logger.info("--> sending request {} on {}", action, connection.getNode());
if ((primaryRelocation && finalized.get()) == false) {
assertNotEquals(action, ShardStateAction.SHARD_FAILED_ACTION_NAME);
}
super.sendRequest(connection, requestId, action, request, options);
}
});
}
if (primaryRelocation) {
logger.info("--> starting primary relocation recovery from blue to red");
client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "red")).get();
// also waits for relocation / recovery to complete
ensureGreen();
// if a primary relocation fails after the source shard has been marked as relocated, both source and target are failed. If the
// source shard is moved back to started because the target fails first, it's possible that there is a cluster state where the
// shard is marked as started again (and ensureGreen returns), but while applying the cluster state the primary is failed and
// will be reallocated. The cluster will thus become green, then red, then green again. Triggering a refresh here before
// searching helps, as in contrast to search actions, refresh waits for the closed shard to be reallocated.
client().admin().indices().prepareRefresh(indexName).get();
} else {
logger.info("--> starting replica recovery from blue to red");
client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().put(IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "red,blue").put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)).get();
ensureGreen();
}
for (int i = 0; i < 10; i++) {
assertHitCount(client().prepareSearch(indexName).get(), numDocs);
}
}
Aggregations