use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.
the class Model method hierarchicalSoftmax.
private float hierarchicalSoftmax(Vector grad_, Vector hidden, int target, float lr) {
float loss = 0.0f;
IntVector binaryCode = codes.get(target);
IntVector pathToRoot = paths.get(target);
for (int i = 0; i < pathToRoot.size(); i++) {
loss += binaryLogistic(grad_, hidden, pathToRoot.get(i), binaryCode.get(i) == 1, lr);
}
return loss;
}
use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.
the class Model method initTableNegatives.
private void initTableNegatives(long[] counts) {
IntVector vec = new IntVector(counts.length * 10);
float z = 0.0f;
for (long count : counts) {
z += (float) Math.pow(count, 0.5);
}
for (int i = 0; i < counts.length; i++) {
float c = (float) Math.pow(counts[i], 0.5);
for (int j = 0; j < c * NEGATIVE_TABLE_SIZE / z; j++) {
vec.add(i);
}
}
vec.shuffle(rng);
negatives = vec.copyOf();
}
use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.
the class ProductQuantizer method train.
void train(int n, float[] x) {
if (n < ksub_) {
throw new IllegalArgumentException("Matrix too small for quantization, must have > 256 rows. But it is " + n);
}
IntVector perm = new IntVector(n);
for (int i = 0; i < n; i++) {
perm.set(i, i);
}
int d = dsub_;
int np = Math.min(n, max_points_);
float[] xslice = new float[np * dsub_];
for (int m = 0; m < nsubq_; m++) {
if (m == nsubq_ - 1) {
d = lastdsub_;
}
if (np != n) {
perm.shuffle(rng);
}
for (int j = 0; j < np; j++) {
System.arraycopy(x, perm.get(j) * dim_ + m * dsub_, xslice, j * d, d);
}
kmeans(new FArray(xslice), get_centroids(m, (byte) 0), np, d);
}
}
use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.
the class ProductQuantizer method kmeans.
void kmeans(FArray x, FArray c, int n, int d) {
IntVector iv = new IntVector(n);
for (int i = 0; i < iv.size(); i++) {
iv.set(i, i);
}
iv.shuffle(rng);
for (int i = 0; i < ksub_; i++) {
x.arrayCopy(iv.get(i) * d, c, i * d, d);
}
BArray codes = new BArray(new byte[n]);
for (int i = 0; i < niter_; i++) {
Estep(x, c, codes, d, n);
MStep(x, c, codes, d, n);
}
}
use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.
the class FastText method sentenceVector.
public float[] sentenceVector(String s) {
Vector svec = new Vector(args_.dim);
if (args_.model == model_name.supervised) {
IntVector line = new IntVector();
dict_.getLine(s, line, model_.getRng());
for (int i : line.copyOf()) {
addInputVector(svec, i);
}
if (line.size() > 0) {
svec.mul(1f / line.size());
}
return svec.getData();
}
IntVector line = new IntVector();
dict_.getLine(s, line, model_.getRng());
dict_.addWordNgramHashes(line, args_.wordNgrams);
if (line.size() == 0) {
return svec.getData();
}
int count = 0;
for (int i : line.copyOf()) {
Vector vec = getWordVector(dict_.getWord(i));
float norm = vec.norm();
if (norm > 0) {
vec.mul(1f / norm);
svec.addVector(vec);
count++;
}
}
if (count > 0) {
svec.mul(1f / count);
}
return svec.getData();
}
Aggregations