use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class TweetEndToEndTest method getIndexArgs.
// Note that in the test cases, we have:
// {... "id":1,"id_str":"1","text":"RT This is a Retweet and will NOT NOT be indexed!" ... }
// {... "id":10,"id_str":"10","text":"This tweet won't be indexed since the maxId is 9" ... }
//
// src/test/resources/sample_docs/tweets/tweets1: 5 JSON objects, 2 deletes
// src/test/resources/sample_docs/tweets/tweets2: 4 JSON objects, 1 deletes
//
// Thus, there should be a total of 4 documents indexed: 9 objects - 5 skipped
@Override
protected IndexArgs getIndexArgs() {
IndexArgs indexArgs = createDefaultIndexArgs();
indexArgs.input = "src/test/resources/sample_docs/tweets/collection1";
indexArgs.collectionClass = TweetCollection.class.getSimpleName();
indexArgs.generatorClass = TweetGenerator.class.getSimpleName();
indexArgs.tweetMaxId = 9L;
return indexArgs;
}
use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class GeoGeneratorTest method riverSetUp.
@Before
public void riverSetUp() {
ObjectMapper mapper = new ObjectMapper();
ObjectNode jsonObj = mapper.createObjectNode();
jsonObj.set("HYRIV_ID", TextNode.valueOf("90000003"));
jsonObj.set("NEXT_DOWN", TextNode.valueOf("0"));
jsonObj.set("MAIN_RIV", TextNode.valueOf("90000003"));
jsonObj.set("LENGTH_KM", TextNode.valueOf("3.02"));
jsonObj.set("DIST_DN_KM", TextNode.valueOf("0.0"));
jsonObj.set("DIST_UP_KM", TextNode.valueOf("35.3"));
jsonObj.set("CATCH_SKM", TextNode.valueOf("12.24"));
jsonObj.set("UPLAND_SKM", TextNode.valueOf("12.2"));
jsonObj.set("ENDORHEIC", TextNode.valueOf("0"));
jsonObj.set("DIS_AV_CMS", TextNode.valueOf("0.03"));
jsonObj.set("ORD_STRA", TextNode.valueOf("1"));
jsonObj.set("ORD_CLAS", TextNode.valueOf("1"));
jsonObj.set("ORD_FLOW", TextNode.valueOf("8"));
jsonObj.set("HYBAS_L12", TextNode.valueOf("9120016580"));
jsonObj.set("geometry", TextNode.valueOf("LINESTRING (-29.737500000000722 83.54583333333295, -29.731250000000642 83.55208333333294, -29.731250000000642 83.57291666666629)"));
jsonObj.set("id", TextNode.valueOf("90000003"));
geoDoc = new JsonCollection.Document(jsonObj);
GeoGenerator generator = new GeoGenerator(new IndexArgs());
doc = generator.createDocument(geoDoc);
}
use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class BibtexEndToEndTest method getIndexArgs.
@Override
protected IndexArgs getIndexArgs() {
IndexArgs indexArgs = createDefaultIndexArgs();
indexArgs.input = "src/test/resources/sample_docs/bib/acl";
indexArgs.collectionClass = BibtexCollection.class.getSimpleName();
indexArgs.generatorClass = BibtexGenerator.class.getSimpleName();
return indexArgs;
}
use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class CoreEndToEndTest method getIndexArgs.
@Override
protected IndexArgs getIndexArgs() {
IndexArgs indexArgs = createDefaultIndexArgs();
indexArgs.input = "src/test/resources/sample_docs/core";
indexArgs.collectionClass = CoreCollection.class.getSimpleName();
indexArgs.generatorClass = CoreGenerator.class.getSimpleName();
return indexArgs;
}
use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class TrecEndToEndTest method getIndexArgs.
@Override
protected IndexArgs getIndexArgs() {
IndexArgs indexArgs = createDefaultIndexArgs();
indexArgs.input = "src/test/resources/sample_docs/trec/collection2";
indexArgs.collectionClass = TrecCollection.class.getSimpleName();
return indexArgs;
}
Aggregations