use of de.ids_mannheim.korap.index.MultiTermToken in project Krill by KorAP.
the class Test method getTermVector.
public static MultiTermTokenStream getTermVector(String stream) {
MultiTermTokenStream ts = new MultiTermTokenStream();
int pos = 0;
for (String seg : stream.split(" ")) {
String[] tokseg = seg.split("\\|");
try {
MultiTermToken mtt = new MultiTermToken('s', tokseg[0]);
mtt.add("T");
mtt.add('i', tokseg[0].toLowerCase());
mtt.add('p', tokseg[1]);
mtt.add('l', tokseg[2]);
if (tokseg.length == 4) {
for (String morph : tokseg[3].split(";")) {
mtt.add('m', morph);
}
}
;
if (tokseg.length == 5) {
mtt.add('e', tokseg[4]);
}
;
ts.addMultiTermToken(mtt);
} catch (CorpusDataException cde) {
fail(cde.getErrorCode() + ": " + cde.getMessage());
}
;
}
;
return ts;
}
use of de.ids_mannheim.korap.index.MultiTermToken in project Krill by KorAP.
the class TestIndex method multiTermToken.
@Test
public void multiTermToken() throws CorpusDataException {
MultiTermToken test = new MultiTermToken("hunde", "pos:n", "m:gen:pl");
assertEquals(test.terms.get(0).term, "hunde");
assertEquals(test.terms.get(1).term, "pos:n");
assertEquals(test.terms.get(2).term, "m:gen:pl");
test = new MultiTermToken("hunde", "pos:n", "m:gen:pl");
assertEquals(test.terms.get(0).term, "hunde");
assertEquals(test.terms.get(1).term, "pos:n");
assertEquals(test.terms.get(2).term, "m:gen:pl");
}
use of de.ids_mannheim.korap.index.MultiTermToken in project Krill by KorAP.
the class FieldDocument method setFields.
/**
* Deserialize token stream data (LEGACY).
*/
public void setFields(ArrayList<Map<String, Object>> fields) {
Map<String, Object> primary = fields.remove(0);
this.setPrimaryData((String) primary.get("primaryData"));
for (Map<String, Object> field : fields) {
String fieldName = (String) field.get("name");
MultiTermTokenStream mtts = this.newMultiTermTokenStream();
for (ArrayList<String> token : (ArrayList<ArrayList<String>>) field.get("data")) {
try {
MultiTermToken mtt = new MultiTermToken(token.remove(0));
for (String term : token) {
mtt.add(term);
}
;
mtts.addMultiTermToken(mtt);
} catch (CorpusDataException cde) {
this.addError(cde.getErrorCode(), cde.getMessage());
}
;
}
;
// as meta fields in the tokenization term vector
if (field.containsKey("foundries")) {
// TODO: Do not store positions!
String foundries = (String) field.get("foundries");
this.addKeyword("foundries", foundries);
super.setFoundries(foundries);
}
;
if (field.containsKey("tokenization")) {
String tokenization = (String) field.get("tokenization");
this.addString("tokenization", tokenization);
super.setTokenization(tokenization);
}
;
this.addTV(fieldName, this.getPrimaryData(), mtts);
}
;
}
use of de.ids_mannheim.korap.index.MultiTermToken in project Krill by KorAP.
the class FieldDocument method setData.
/**
* Deserialize token stream data.
*/
public void setData(Map<String, Object> node) {
this.setPrimaryData((String) node.get("text"));
String fieldName = (String) node.get("name");
MultiTermTokenStream mtts = this.newMultiTermTokenStream();
// Iterate over all tokens in stream
for (ArrayList<String> token : (ArrayList<ArrayList<String>>) node.get("stream")) {
try {
// Initialize MultiTermToken
MultiTermToken mtt = new MultiTermToken(token.remove(0));
// Add rest of the list
for (String term : token) {
mtt.add(term);
}
;
// Add MultiTermToken to stream
mtts.addMultiTermToken(mtt);
} catch (CorpusDataException cde) {
this.addError(cde.getErrorCode(), cde.getMessage());
}
;
}
;
// Add tokenstream to fielddocument
this.addTV(fieldName, this.getPrimaryData(), mtts);
// Get foundry info
if (node.containsKey("foundries"))
this.setFoundries((String) node.get("foundries"));
// Get layer info
if (node.containsKey("layerInfos"))
this.setLayerInfos((String) node.get("layerInfos"));
// Get tokenSource info
if (node.containsKey("tokenSource"))
this.setTokenSource((String) node.get("tokenSource"));
}
Aggregations