Search in sources :

Example 61 with ClassPathResource

use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.

the class DefaultDocumentIteratorTest method testDocumentIterator.

@Test
public void testDocumentIterator() throws Exception {
    ClassPathResource reuters5250 = new ClassPathResource("/reuters/5250");
    File f = reuters5250.getFile();
    DocumentIterator iter = new FileDocumentIterator(f.getAbsolutePath());
    InputStream doc = iter.nextDocument();
    TokenizerFactory t = new DefaultTokenizerFactory();
    Tokenizer next = t.create(doc);
    String[] list = "PEARSON CONCENTRATES ON FOUR SECTORS".split(" ");
    ///PEARSON CONCENTRATES ON FOUR SECTORS
    int count = 0;
    while (next.hasMoreTokens() && count < list.length) {
        String token = next.nextToken();
        assertEquals(list[count++], token);
    }
    doc.close();
}
Also used : DefaultTokenizerFactory(org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory) DefaultTokenizerFactory(org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory) TokenizerFactory(org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory) InputStream(java.io.InputStream) File(java.io.File) Tokenizer(org.deeplearning4j.text.tokenization.tokenizer.Tokenizer) ClassPathResource(org.datavec.api.util.ClassPathResource) Test(org.junit.Test)

Example 62 with ClassPathResource

use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.

the class FileDocumentIteratorTest method testNextDocument.

/**
     * Checks actual number of documents retrieved by DocumentIterator
     * @throws Exception
     */
@Test
public void testNextDocument() throws Exception {
    ClassPathResource reuters5250 = new ClassPathResource("/reuters/5250");
    File f = reuters5250.getFile();
    DocumentIterator iter = new FileDocumentIterator(f.getAbsolutePath());
    log.info(f.getAbsolutePath());
    int cnt = 0;
    while (iter.hasNext()) {
        InputStream stream = iter.nextDocument();
        stream.close();
        cnt++;
    }
    assertEquals(24, cnt);
}
Also used : InputStream(java.io.InputStream) File(java.io.File) ClassPathResource(org.datavec.api.util.ClassPathResource) Test(org.junit.Test)

Example 63 with ClassPathResource

use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.

the class FileLabelAwareIteratorTest method testExtractLabelFromPath2.

@Test
public void testExtractLabelFromPath2() throws Exception {
    ClassPathResource resource = new ClassPathResource("/labeled");
    ClassPathResource resource2 = new ClassPathResource("/rootdir");
    FileLabelAwareIterator iterator = new FileLabelAwareIterator.Builder().addSourceFolder(resource.getFile()).addSourceFolder(resource2.getFile()).build();
    int cnt = 0;
    while (iterator.hasNextDocument()) {
        LabelledDocument document = iterator.nextDocument();
        assertNotEquals(null, document);
        assertNotEquals(null, document.getContent());
        assertNotEquals(null, document.getLabel());
        cnt++;
    }
    assertEquals(5, cnt);
    assertEquals(5, iterator.getLabelsSource().getNumberOfLabelsUsed());
    assertTrue(iterator.getLabelsSource().getLabels().contains("positive"));
    assertTrue(iterator.getLabelsSource().getLabels().contains("negative"));
    assertTrue(iterator.getLabelsSource().getLabels().contains("neutral"));
    assertTrue(iterator.getLabelsSource().getLabels().contains("label1"));
    assertTrue(iterator.getLabelsSource().getLabels().contains("label2"));
}
Also used : ClassPathResource(org.datavec.api.util.ClassPathResource) Test(org.junit.Test)

Example 64 with ClassPathResource

use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.

the class FileLabelAwareIteratorTest method testExtractLabelFromPath1.

@Test
public void testExtractLabelFromPath1() throws Exception {
    ClassPathResource resource = new ClassPathResource("/labeled");
    FileLabelAwareIterator iterator = new FileLabelAwareIterator.Builder().addSourceFolder(resource.getFile()).build();
    int cnt = 0;
    while (iterator.hasNextDocument()) {
        LabelledDocument document = iterator.nextDocument();
        assertNotEquals(null, document);
        assertNotEquals(null, document.getContent());
        assertNotEquals(null, document.getLabel());
        cnt++;
    }
    assertEquals(3, cnt);
    assertEquals(3, iterator.getLabelsSource().getNumberOfLabelsUsed());
    assertTrue(iterator.getLabelsSource().getLabels().contains("positive"));
    assertTrue(iterator.getLabelsSource().getLabels().contains("negative"));
    assertTrue(iterator.getLabelsSource().getLabels().contains("neutral"));
}
Also used : ClassPathResource(org.datavec.api.util.ClassPathResource) Test(org.junit.Test)

Example 65 with ClassPathResource

use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.

the class AggregatingSentenceIteratorTest method testHasNext.

@Test
public void testHasNext() throws Exception {
    ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
    File file = resource.getFile();
    BasicLineIterator iterator = new BasicLineIterator(file);
    BasicLineIterator iterator2 = new BasicLineIterator(file);
    AggregatingSentenceIterator aggr = new AggregatingSentenceIterator.Builder().addSentenceIterator(iterator).addSentenceIterator(iterator2).build();
    int cnt = 0;
    while (aggr.hasNext()) {
        String line = aggr.nextSentence();
        cnt++;
    }
    assertEquals((97162 * 2), cnt);
    aggr.reset();
    while (aggr.hasNext()) {
        String line = aggr.nextSentence();
        cnt++;
    }
    assertEquals((97162 * 4), cnt);
}
Also used : File(java.io.File) ClassPathResource(org.datavec.api.util.ClassPathResource) Test(org.junit.Test)

Aggregations

ClassPathResource (org.datavec.api.util.ClassPathResource)71 Test (org.junit.Test)63 File (java.io.File)45 TokenizerFactory (org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory)28 BasicLineIterator (org.deeplearning4j.text.sentenceiterator.BasicLineIterator)27 DefaultTokenizerFactory (org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory)27 VocabWord (org.deeplearning4j.models.word2vec.VocabWord)23 SentenceIterator (org.deeplearning4j.text.sentenceiterator.SentenceIterator)23 INDArray (org.nd4j.linalg.api.ndarray.INDArray)23 CommonPreprocessor (org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor)20 SentenceTransformer (org.deeplearning4j.models.sequencevectors.transformers.impl.SentenceTransformer)12 AbstractCache (org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache)11 WordVectors (org.deeplearning4j.models.embeddings.wordvectors.WordVectors)10 AbstractSequenceIterator (org.deeplearning4j.models.sequencevectors.iterators.AbstractSequenceIterator)10 ArrayList (java.util.ArrayList)9 Word2Vec (org.deeplearning4j.models.word2vec.Word2Vec)8 AggregatingSentenceIterator (org.deeplearning4j.text.sentenceiterator.AggregatingSentenceIterator)7 FileSentenceIterator (org.deeplearning4j.text.sentenceiterator.FileSentenceIterator)7 DataSet (org.nd4j.linalg.dataset.DataSet)7 InputStream (java.io.InputStream)6