use of org.nd4j.parameterserver.distributed.training.impl.SkipGramTrainer in project nd4j by deeplearning4j.
the class VoidParameterServerTest method testNodeInitialization1.
@Test
public void testNodeInitialization1() throws Exception {
final AtomicInteger failCnt = new AtomicInteger(0);
final AtomicInteger passCnt = new AtomicInteger(0);
final VoidConfiguration conf = VoidConfiguration.builder().unicastPort(34567).multicastPort(45678).numberOfShards(10).shardAddresses(localIPs).multicastNetwork("224.0.1.1").ttl(4).build();
Thread[] threads = new Thread[10];
for (int t = 0; t < threads.length; t++) {
threads[t] = new Thread(new Runnable() {
@Override
public void run() {
VoidParameterServer node = new VoidParameterServer();
node.init(conf, transport, new SkipGramTrainer());
if (node.getNodeRole() != NodeRole.SHARD)
failCnt.incrementAndGet();
passCnt.incrementAndGet();
node.shutdown();
}
});
threads[t].start();
}
for (int t = 0; t < threads.length; t++) {
threads[t].join();
}
assertEquals(0, failCnt.get());
assertEquals(threads.length, passCnt.get());
}
use of org.nd4j.parameterserver.distributed.training.impl.SkipGramTrainer in project nd4j by deeplearning4j.
the class VoidParameterServerTest method testNodeRole1.
@Test
public void testNodeRole1() throws Exception {
final VoidConfiguration conf = VoidConfiguration.builder().unicastPort(34567).multicastPort(45678).numberOfShards(10).multicastNetwork("224.0.1.1").shardAddresses(localIPs).ttl(4).build();
VoidParameterServer node = new VoidParameterServer();
node.init(conf, transport, new SkipGramTrainer());
assertEquals(NodeRole.SHARD, node.getNodeRole());
node.shutdown();
}
use of org.nd4j.parameterserver.distributed.training.impl.SkipGramTrainer in project nd4j by deeplearning4j.
the class VoidParameterServerStressTest method testPerformanceMulticast1.
@Test
@Ignore
public void testPerformanceMulticast1() throws Exception {
VoidConfiguration voidConfiguration = VoidConfiguration.builder().networkMask("192.168.0.0/16").numberOfShards(1).build();
List<String> addresses = new ArrayList<>();
for (int s = 0; s < 5; s++) {
addresses.add("192.168.1.35:3789" + s);
}
voidConfiguration.setShardAddresses(addresses);
voidConfiguration.setForcedRole(NodeRole.CLIENT);
VoidConfiguration[] voidConfigurations = new VoidConfiguration[5];
VoidParameterServer[] shards = new VoidParameterServer[5];
for (int s = 0; s < shards.length; s++) {
voidConfigurations[s] = VoidConfiguration.builder().unicastPort(Integer.valueOf("3789" + s)).networkMask("192.168.0.0/16").build();
voidConfigurations[s].setShardAddresses(addresses);
MulticastTransport transport = new MulticastTransport();
transport.setIpAndPort("192.168.1.35", Integer.valueOf("3789" + s));
shards[s] = new VoidParameterServer(false);
shards[s].setShardIndex((short) s);
shards[s].init(voidConfigurations[s], transport, new SkipGramTrainer());
assertEquals(NodeRole.SHARD, shards[s].getNodeRole());
}
// this is going to be our Client shard
VoidParameterServer parameterServer = new VoidParameterServer();
parameterServer.init(voidConfiguration);
assertEquals(NodeRole.CLIENT, VoidParameterServer.getInstance().getNodeRole());
log.info("Instantiation finished...");
parameterServer.initializeSeqVec(100, NUM_WORDS, 123, 20, true, false);
log.info("Initialization finished...");
final List<Long> times = new CopyOnWriteArrayList<>();
Thread[] threads = new Thread[8];
for (int t = 0; t < threads.length; t++) {
final int e = t;
threads[t] = new Thread(() -> {
List<Long> results = new ArrayList<>();
int chunk = NUM_WORDS / threads.length;
int start = e * chunk;
int end = (e + 1) * chunk;
for (int i = 0; i < 100000; i++) {
long time1 = System.nanoTime();
INDArray array = parameterServer.getVector(RandomUtils.nextInt(start, end));
long time2 = System.nanoTime();
results.add(time2 - time1);
if ((i + 1) % 1000 == 0)
log.info("Thread {} cnt {}", e, i + 1);
}
times.addAll(results);
});
threads[t].setDaemon(true);
threads[t].start();
}
for (int t = 0; t < threads.length; t++) {
try {
threads[t].join();
} catch (Exception e) {
}
}
List<Long> newTimes = new ArrayList<>(times);
Collections.sort(newTimes);
log.info("p50: {} us", newTimes.get(newTimes.size() / 2) / 1000);
parameterServer.shutdown();
;
for (VoidParameterServer server : shards) {
server.shutdown();
}
}
use of org.nd4j.parameterserver.distributed.training.impl.SkipGramTrainer in project nd4j by deeplearning4j.
the class VoidParameterServerStressTest method testPerformanceUnicast2.
/**
* This is second super-important test for unicast transport.
* Here we send non-blocking messages
*/
@Test
@Ignore
public void testPerformanceUnicast2() {
List<String> list = new ArrayList<>();
for (int t = 0; t < 5; t++) {
list.add("127.0.0.1:3838" + t);
}
VoidConfiguration voidConfiguration = VoidConfiguration.builder().unicastPort(49823).numberOfShards(list.size()).shardAddresses(list).build();
VoidParameterServer[] shards = new VoidParameterServer[list.size()];
for (int t = 0; t < shards.length; t++) {
shards[t] = new VoidParameterServer(NodeRole.SHARD);
Transport transport = new RoutedTransport();
transport.setIpAndPort("127.0.0.1", Integer.valueOf("3838" + t));
shards[t].setShardIndex((short) t);
shards[t].init(voidConfiguration, transport, new SkipGramTrainer());
assertEquals(NodeRole.SHARD, shards[t].getNodeRole());
}
VoidParameterServer clientNode = new VoidParameterServer();
RoutedTransport transport = new RoutedTransport();
ClientRouter router = new InterleavedRouter(0);
transport.setRouter(router);
transport.setIpAndPort("127.0.0.1", voidConfiguration.getUnicastPort());
router.init(voidConfiguration, transport);
clientNode.init(voidConfiguration, transport, new SkipGramTrainer());
assertEquals(NodeRole.CLIENT, clientNode.getNodeRole());
final List<Long> times = new CopyOnWriteArrayList<>();
// at this point, everything should be started, time for tests
clientNode.initializeSeqVec(100, NUM_WORDS, 123, 25, true, false);
log.info("Initialization finished, going to tests...");
Thread[] threads = new Thread[4];
for (int t = 0; t < threads.length; t++) {
final int e = t;
threads[t] = new Thread(() -> {
List<Long> results = new ArrayList<>();
int chunk = NUM_WORDS / threads.length;
int start = e * chunk;
int end = (e + 1) * chunk;
for (int i = 0; i < 200; i++) {
Frame<SkipGramRequestMessage> frame = new Frame<>(BasicSequenceProvider.getInstance().getNextValue());
for (int f = 0; f < 128; f++) {
frame.stackMessage(getSGRM());
}
long time1 = System.nanoTime();
clientNode.execDistributed(frame);
long time2 = System.nanoTime();
results.add(time2 - time1);
if ((i + 1) % 100 == 0)
log.info("Thread {} cnt {}", e, i + 1);
}
times.addAll(results);
});
threads[t].setDaemon(true);
threads[t].start();
}
for (int t = 0; t < threads.length; t++) {
try {
threads[t].join();
} catch (Exception e) {
}
}
List<Long> newTimes = new ArrayList<>(times);
Collections.sort(newTimes);
log.info("p50: {} us", newTimes.get(newTimes.size() / 2) / 1000);
// shutdown everything
for (VoidParameterServer shard : shards) {
shard.getTransport().shutdown();
}
clientNode.getTransport().shutdown();
}
Aggregations