use of org.apache.manifoldcf.core.interfaces.Specification in project manifoldcf by apache.
the class AlfrescoConnectorTest method whenTheClientIsCalledItShouldUseThePreviouslySentLastTransactionId.
@Test
public void whenTheClientIsCalledItShouldUseThePreviouslySentLastTransactionId() throws Exception {
long firstTransactionId = 0;
long lastTransactionId = 5;
long firstAclChangesetId = 0;
long lastAclChangesetId = 5;
when(client.fetchNodes(anyInt(), anyInt(), Mockito.any(AlfrescoFilters.class))).thenReturn(new AlfrescoResponse(lastTransactionId, lastAclChangesetId));
connector.addSeedDocuments(mock(SeedingActivity.class), new Specification(), "", 0, BaseRepositoryConnector.JOBMODE_ONCEONLY);
verify(client, times(1)).fetchNodes(eq(firstTransactionId), eq(firstAclChangesetId), Mockito.any(AlfrescoFilters.class));
verify(client, times(1)).fetchNodes(eq(lastTransactionId), eq(lastAclChangesetId), Mockito.any(AlfrescoFilters.class));
}
use of org.apache.manifoldcf.core.interfaces.Specification in project manifoldcf by apache.
the class AlfrescoConnectorTest method whenADocumentIsReturnedItShouldBeAddedToManifold.
@Test
public void whenADocumentIsReturnedItShouldBeAddedToManifold() throws Exception {
TestDocument testDocument = new TestDocument();
when(client.fetchNodes(anyInt(), anyInt(), Mockito.any(AlfrescoFilters.class))).thenReturn(new AlfrescoResponse(0, 0, "", "", Arrays.<Map<String, Object>>asList(testDocument)));
SeedingActivity seedingActivity = mock(SeedingActivity.class);
connector.addSeedDocuments(seedingActivity, new Specification(), "", 0, BaseRepositoryConnector.JOBMODE_ONCEONLY);
verify(seedingActivity).addSeedDocument(eq(TestDocument.uuid));
}
use of org.apache.manifoldcf.core.interfaces.Specification in project manifoldcf by apache.
the class DocumentContentExclusionHSQLDBIT method addContentExclusionRule.
private void addContentExclusionRule(IJobDescription job) throws ManifoldCFException {
Specification jobSpec = job.getSpecification();
SpecificationNode sn;
sn = new SpecificationNode(WebcrawlerConfig.NODE_EXCLUDESCONTENTINDEX);
sn.setValue(".*expired.*\n");
jobSpec.addChild(jobSpec.getChildCount(), sn);
jobManager.save(job);
}
use of org.apache.manifoldcf.core.interfaces.Specification in project manifoldcf by apache.
the class DocumentContentExclusionHSQLDBIT method setupContentFilterJob.
private IJobDescription setupContentFilterJob() throws Exception {
// Create a job.
IJobDescription job = jobManager.createJob();
job.setDescription("Test Job");
job.setConnectionName(WEB_CONNECTION);
job.addPipelineStage(-1, true, "Null Connection", "");
job.setType(job.TYPE_SPECIFIED);
job.setStartMethod(job.START_DISABLE);
job.setHopcountMode(job.HOPCOUNT_NEVERDELETE);
Specification jobSpec = job.getSpecification();
// 3 seeds only
SpecificationNode sn = new SpecificationNode(WebcrawlerConfig.NODE_SEEDS);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < MAX_DOC_COUNT; i++) {
sb.append(baseUrl + i + "\n");
}
sn.setValue(sb.toString());
jobSpec.addChild(jobSpec.getChildCount(), sn);
sn = new SpecificationNode(WebcrawlerConfig.NODE_INCLUDES);
sn.setValue(".*\n");
jobSpec.addChild(jobSpec.getChildCount(), sn);
sn = new SpecificationNode(WebcrawlerConfig.NODE_INCLUDESINDEX);
sn.setValue(".*\n");
jobSpec.addChild(jobSpec.getChildCount(), sn);
// Save the job.
jobManager.save(job);
return job;
}
use of org.apache.manifoldcf.core.interfaces.Specification in project manifoldcf by apache.
the class ConfluenceConnectorTest method mockSimpleIngestion.
@Test
public void mockSimpleIngestion() throws Exception {
Page fakePage = mock(Page.class);
Date date = new Date();
DateFormat df = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.MEDIUM, Locale.ROOT);
String content = "A";
String uri = "http://test";
byte[] documentBytes = content.getBytes(StandardCharsets.UTF_8);
long size = (long) documentBytes.length;
when(fakePage.hasContent()).thenReturn(true);
when(fakePage.getContent()).thenReturn(content);
when(fakePage.getLength()).thenReturn(size);
when(fakePage.getLastModifiedDate()).thenReturn(date);
when(fakePage.getMediaType()).thenReturn("text/plain");
when(fakePage.getCreatedDate()).thenReturn(date);
when(fakePage.getWebUrl()).thenReturn(uri);
Map<String, Object> metadata = new HashMap<String, Object>();
metadata.put("x", "y");
when(fakePage.getMetadataAsMap()).thenReturn(metadata);
IProcessActivity activities = mock(IProcessActivity.class);
when(activities.checkLengthIndexable(anyLong())).thenReturn(true);
when(activities.checkMimeTypeIndexable(anyString())).thenReturn(true);
when(activities.checkDateIndexable((Date) anyObject())).thenReturn(true);
when(activities.checkURLIndexable(anyString())).thenReturn(true);
when(activities.checkDocumentNeedsReindexing(anyString(), anyString())).thenReturn(true);
IExistingVersions statuses = mock(IExistingVersions.class);
String ID = df.format(date);
when(statuses.getIndexedVersionString(ID)).thenReturn(null);
when(client.getPage(Mockito.anyString())).thenReturn(fakePage);
connector.processDocuments(new String[] { ID }, statuses, new Specification(), activities, 0, true);
ArgumentCaptor<RepositoryDocument> rd = ArgumentCaptor.forClass(RepositoryDocument.class);
verify(client, times(1)).getPage(ID);
verify(activities, times(1)).ingestDocumentWithException(eq(ID), eq(df.format(date)), eq(uri), rd.capture());
verify(activities, times(1)).recordActivity(anyLong(), eq("read document"), eq(size), eq(ID), eq("OK"), anyString(), Mockito.isNull(String[].class));
RepositoryDocument doc = rd.getValue();
Assert.assertEquals(size, doc.getBinaryLength());
String[] values = doc.getFieldAsStrings("x");
Assert.assertEquals(values.length, 1);
Assert.assertEquals(values[0], "y");
}
Aggregations