Search in sources :

Example 1 with IntVector

use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.

the class Model method hierarchicalSoftmax.

private float hierarchicalSoftmax(Vector grad_, Vector hidden, int target, float lr) {
    float loss = 0.0f;
    IntVector binaryCode = codes.get(target);
    IntVector pathToRoot = paths.get(target);
    for (int i = 0; i < pathToRoot.size(); i++) {
        loss += binaryLogistic(grad_, hidden, pathToRoot.get(i), binaryCode.get(i) == 1, lr);
    }
    return loss;
}
Also used : IntVector(zemberek.core.collections.IntVector)

Example 2 with IntVector

use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.

the class Model method initTableNegatives.

private void initTableNegatives(long[] counts) {
    IntVector vec = new IntVector(counts.length * 10);
    float z = 0.0f;
    for (long count : counts) {
        z += (float) Math.pow(count, 0.5);
    }
    for (int i = 0; i < counts.length; i++) {
        float c = (float) Math.pow(counts[i], 0.5);
        for (int j = 0; j < c * NEGATIVE_TABLE_SIZE / z; j++) {
            vec.add(i);
        }
    }
    vec.shuffle(rng);
    negatives = vec.copyOf();
}
Also used : IntVector(zemberek.core.collections.IntVector)

Example 3 with IntVector

use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.

the class ProductQuantizer method train.

void train(int n, float[] x) {
    if (n < ksub_) {
        throw new IllegalArgumentException("Matrix too small for quantization, must have > 256 rows. But it is " + n);
    }
    IntVector perm = new IntVector(n);
    for (int i = 0; i < n; i++) {
        perm.set(i, i);
    }
    int d = dsub_;
    int np = Math.min(n, max_points_);
    float[] xslice = new float[np * dsub_];
    for (int m = 0; m < nsubq_; m++) {
        if (m == nsubq_ - 1) {
            d = lastdsub_;
        }
        if (np != n) {
            perm.shuffle(rng);
        }
        for (int j = 0; j < np; j++) {
            System.arraycopy(x, perm.get(j) * dim_ + m * dsub_, xslice, j * d, d);
        }
        kmeans(new FArray(xslice), get_centroids(m, (byte) 0), np, d);
    }
}
Also used : IntVector(zemberek.core.collections.IntVector)

Example 4 with IntVector

use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.

the class ProductQuantizer method kmeans.

void kmeans(FArray x, FArray c, int n, int d) {
    IntVector iv = new IntVector(n);
    for (int i = 0; i < iv.size(); i++) {
        iv.set(i, i);
    }
    iv.shuffle(rng);
    for (int i = 0; i < ksub_; i++) {
        x.arrayCopy(iv.get(i) * d, c, i * d, d);
    }
    BArray codes = new BArray(new byte[n]);
    for (int i = 0; i < niter_; i++) {
        Estep(x, c, codes, d, n);
        MStep(x, c, codes, d, n);
    }
}
Also used : IntVector(zemberek.core.collections.IntVector)

Example 5 with IntVector

use of zemberek.core.collections.IntVector in project zemberek-nlp by ahmetaa.

the class FastText method sentenceVector.

public float[] sentenceVector(String s) {
    Vector svec = new Vector(args_.dim);
    if (args_.model == model_name.supervised) {
        IntVector line = new IntVector();
        dict_.getLine(s, line, model_.getRng());
        for (int i : line.copyOf()) {
            addInputVector(svec, i);
        }
        if (line.size() > 0) {
            svec.mul(1f / line.size());
        }
        return svec.getData();
    }
    IntVector line = new IntVector();
    dict_.getLine(s, line, model_.getRng());
    dict_.addWordNgramHashes(line, args_.wordNgrams);
    if (line.size() == 0) {
        return svec.getData();
    }
    int count = 0;
    for (int i : line.copyOf()) {
        Vector vec = getWordVector(dict_.getWord(i));
        float norm = vec.norm();
        if (norm > 0) {
            vec.mul(1f / norm);
            svec.addVector(vec);
            count++;
        }
    }
    if (count > 0) {
        svec.mul(1f / count);
    }
    return svec.getData();
}
Also used : IntVector(zemberek.core.collections.IntVector) IntVector(zemberek.core.collections.IntVector)

Aggregations

IntVector (zemberek.core.collections.IntVector)21 BufferedReader (java.io.BufferedReader)3 ArrayList (java.util.ArrayList)2 ScoredItem (zemberek.core.ScoredItem)2