1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.any23.extractor.html;
19
20 import org.apache.any23.AbstractAny23TestBase;
21 import org.apache.any23.extractor.IssueReport;
22 import org.apache.any23.extractor.IssueReport.Issue;
23 import org.apache.any23.extractor.IssueReport.IssueLevel;
24 import org.apache.any23.extractor.ExtractionException;
25 import org.apache.any23.extractor.ExtractorFactory;
26 import org.apache.any23.extractor.SingleDocumentExtraction;
27 import org.apache.any23.extractor.SingleDocumentExtractionReport;
28 import org.apache.any23.rdf.RDFUtils;
29 import org.apache.any23.vocab.SINDICE;
30 import org.apache.any23.writer.RepositoryWriter;
31 import org.junit.After;
32 import org.junit.Assert;
33 import org.junit.Before;
34 import org.eclipse.rdf4j.common.iteration.Iterations;
35 import org.eclipse.rdf4j.model.BNode;
36 import org.eclipse.rdf4j.model.Literal;
37 import org.eclipse.rdf4j.model.Resource;
38 import org.eclipse.rdf4j.model.Statement;
39 import org.eclipse.rdf4j.model.IRI;
40 import org.eclipse.rdf4j.model.Value;
41 import org.eclipse.rdf4j.repository.RepositoryConnection;
42 import org.eclipse.rdf4j.repository.RepositoryException;
43 import org.eclipse.rdf4j.repository.RepositoryResult;
44 import org.eclipse.rdf4j.repository.sail.SailRepository;
45 import org.eclipse.rdf4j.rio.RDFFormat;
46 import org.eclipse.rdf4j.rio.RDFHandlerException;
47 import org.eclipse.rdf4j.rio.RDFParseException;
48 import org.eclipse.rdf4j.rio.Rio;
49 import org.eclipse.rdf4j.sail.Sail;
50 import org.eclipse.rdf4j.sail.memory.MemoryStore;
51 import org.slf4j.Logger;
52 import org.slf4j.LoggerFactory;
53
54 import java.io.ByteArrayOutputStream;
55 import java.io.IOException;
56 import java.io.PrintStream;
57 import java.io.StringWriter;
58 import java.lang.invoke.MethodHandles;
59 import java.nio.charset.StandardCharsets;
60 import java.util.ArrayList;
61 import java.util.Collection;
62 import java.util.Collections;
63 import java.util.List;
64 import java.util.Locale;
65 import java.util.Map;
66
67 /**
68 * Abstract class used to write {@link org.apache.any23.extractor.Extractor} specific test cases.
69 */
70 public abstract class AbstractExtractorTestCase extends AbstractAny23TestBase {
71
72 private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
73
74 /**
75 * Base test document.
76 */
77 protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/");
78
79 /**
80 * Internal connection used to collect extraction results.
81 */
82 protected RepositoryConnection conn;
83
84 /**
85 * The latest generated report.
86 */
87 private SingleDocumentExtractionReport report;
88
89 private Sail store;
90
91 private SailRepository repository;
92
93 /**
94 * Constructor.
95 */
96 public AbstractExtractorTestCase() {
97 super();
98 }
99
100 /**
101 * @return the factory of the extractor to be tested.
102 */
103 protected abstract ExtractorFactory<?> getExtractorFactory();
104
105 /**
106 * Test case initialization.
107 *
108 * @throws Exception
109 * if there is an error constructing input objects
110 */
111 @Before
112 public void setUp() throws Exception {
113 super.setUp();
114 store = new MemoryStore();
115 repository = new SailRepository(store);
116 repository.init();
117 conn = repository.getConnection();
118 }
119
120 /**
121 * Test case resources release.
122 *
123 * @throws RepositoryException
124 * if an error is encountered whilst loading content from a storage connection
125 *
126 */
127 @After
128 public void tearDown() throws RepositoryException {
129 try {
130 conn.close();
131 } finally {
132 repository.shutDown();
133 }
134 conn = null;
135 report = null;
136 store = null;
137 repository = null;
138 }
139
140 /**
141 * @return the connection to the memory repository.
142 */
143 protected RepositoryConnection getConnection() {
144 return conn;
145 }
146
147 /**
148 * @return the last generated report.
149 */
150 protected SingleDocumentExtractionReport getReport() {
151 return report;
152 }
153
154 /**
155 * Returns the list of issues raised by a given extractor.
156 *
157 * @param extractorName
158 * name of the extractor.
159 *
160 * @return collection of issues.
161 */
162 protected Collection<IssueReport.Issue> getIssues(String extractorName) {
163 for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report.getExtractorToIssues().entrySet()) {
164 if (issueEntry.getKey().equals(extractorName)) {
165 return issueEntry.getValue();
166 }
167 }
168 return Collections.emptyList();
169 }
170
171 /**
172 * Returns the list of issues raised by the extractor under testing.
173 *
174 * @return collection of issues.
175 */
176 protected Collection<IssueReport.Issue> getIssues() {
177 return getIssues(getExtractorFactory().getExtractorName());
178 }
179
180 /**
181 * Applies the extractor provided by the {@link #getExtractorFactory()} to the specified resource.
182 *
183 * @param resource
184 * resource name.
185 *
186 * @throws org.apache.any23.extractor.ExtractionException
187 * if there is an exception during extraction
188 * @throws IOException
189 * if there is an error processing the input data
190 */
191 // TODO: MimeType detector to null forces the execution of all extractors,
192 // but extraction
193 // tests should be based on mimetype detection.
194 protected void extract(String resource) throws ExtractionException, IOException {
195 SingleDocumentExtraction ex = new SingleDocumentExtraction(
196 new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI.stringValue()),
197 getExtractorFactory(), new RepositoryWriter(conn));
198 ex.setMIMETypeDetector(null);
199 report = ex.run();
200 }
201
202 /**
203 * Performs data extraction over the content of a resource and assert that the extraction was fine.
204 *
205 * @param resource
206 * resource name.
207 * @param assertNoIssues
208 * if <code>true</code>invokes {@link #assertNoIssues()} after the extraction.
209 */
210 protected void assertExtract(String resource, boolean assertNoIssues) {
211 try {
212 extract(resource);
213 if (assertNoIssues)
214 assertNoIssues();
215 } catch (ExtractionException ex) {
216 throw new RuntimeException(ex);
217 } catch (IOException ex) {
218 throw new RuntimeException(ex);
219 }
220 }
221
222 /**
223 * Performs data extraction over the content of a resource and assert that the extraction was fine and raised no
224 * issues.
225 *
226 * @param resource
227 * input resource to test extraction on.
228 */
229 protected void assertExtract(String resource) {
230 assertExtract(resource, true);
231 }
232
233 /**
234 * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
235 *
236 * @param p
237 * predicate
238 * @param o
239 * object.
240 *
241 * @throws RepositoryException
242 * if an error is encountered whilst loading content from a storage connection
243 *
244 */
245 protected void assertContains(IRI p, Resource o) throws RepositoryException {
246 assertContains(null, p, o);
247 }
248
249 /**
250 * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
251 *
252 * @param p
253 * predicate
254 * @param o
255 * object.
256 *
257 * @throws RepositoryException
258 * if an error is encountered whilst loading content from a storage connection
259 *
260 */
261 protected void assertContains(IRI p, String o) throws RepositoryException {
262 assertContains(null, p, RDFUtils.literal(o));
263 }
264
265 /**
266 * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>.
267 *
268 * @param p
269 * predicate
270 * @param o
271 * object.
272 *
273 * @throws RepositoryException
274 * if an error is encountered whilst loading content from a storage connection
275 *
276 */
277 protected void assertNotContains(IRI p, Resource o) throws RepositoryException {
278 assertNotContains(null, p, o);
279 }
280
281 /**
282 * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
283 *
284 * @param s
285 * subject.
286 * @param p
287 * predicate.
288 * @param o
289 * object.
290 *
291 * @throws RepositoryException
292 * if an error is encountered whilst loading content from a storage connection
293 *
294 */
295 protected void assertContains(Resource s, IRI p, Value o) throws RepositoryException {
296 Assert.assertTrue(
297 getFailedExtractionMessage() + String.format(Locale.ROOT, "Cannot find triple (%s %s %s)", s, p, o),
298 conn.hasStatement(s, p, o, false));
299 }
300
301 /**
302 * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
303 *
304 * @param s
305 * subject.
306 * @param p
307 * predicate.
308 * @param o
309 * object.
310 *
311 * @throws RepositoryException
312 * if an error is encountered whilst loading content from a storage connection
313 *
314 */
315 protected void assertNotContains(Resource s, IRI p, String o) throws RepositoryException {
316 Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, RDFUtils.literal(o), false));
317 }
318
319 /**
320 * Asserts that the extracted triples contain the pattern <code>(s p o)</code>.
321 *
322 * @param s
323 * subject.
324 * @param p
325 * predicate.
326 * @param o
327 * object.
328 *
329 * @throws RepositoryException
330 * if an error is encountered whilst loading content from a storage connection
331 *
332 */
333 protected void assertNotContains(Resource s, IRI p, Resource o) throws RepositoryException {
334 Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false));
335 }
336
337 /**
338 * Asserts that the model contains at least a statement.
339 *
340 * @throws RepositoryException
341 * if an error is encountered whilst loading content from a storage connection
342 *
343 */
344 protected void assertModelNotEmpty() throws RepositoryException {
345 Assert.assertFalse("The model is expected to not be empty." + getFailedExtractionMessage(), conn.isEmpty());
346 }
347
348 /**
349 * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
350 *
351 * @param s
352 * subject.
353 * @param p
354 * predicate.
355 * @param o
356 * object.
357 *
358 * @throws RepositoryException
359 * if an error is encountered whilst loading content from a storage connection
360 *
361 */
362 protected void assertNotContains(Resource s, IRI p, Literal o) throws RepositoryException {
363 Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false));
364 }
365
366 /**
367 * Asserts that the model is expected to contains no statements.
368 *
369 * @throws RepositoryException
370 * if an error is encountered whilst loading content from a storage connection
371 *
372 */
373 protected void assertModelEmpty() throws RepositoryException {
374 Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
375 }
376
377 /**
378 * Asserts that the extraction generated no issues.
379 */
380 protected void assertNoIssues() {
381 for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report.getExtractorToIssues().entrySet()) {
382 if (entry.getValue().size() > 0) {
383 log.debug("Unexpected issue for extractor " + entry.getKey() + " : " + entry.getValue());
384 }
385 for (Issue nextIssue : entry.getValue()) {
386 if (nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
387 Assert.fail("Unexpected issue for extractor " + entry.getKey() + " : " + entry.getValue());
388 }
389 }
390 }
391 }
392
393 /**
394 * Asserts that an issue has been produced by the processed {@link org.apache.any23.extractor.Extractor}.
395 *
396 * @param level
397 * expected issue level
398 * @param issueRegex
399 * regex matching the expected human readable issue message.
400 */
401 protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
402 final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory().getExtractorName());
403 boolean found = false;
404 for (IssueReport.Issue issue : issues) {
405 if (issue.getLevel() == level && issue.getMessage().matches(issueRegex)) {
406 found = true;
407 break;
408 }
409 }
410 Assert.assertTrue(String.format(Locale.ROOT, "Cannot find issue with level %s matching expression '%s'", level,
411 issueRegex), found);
412 }
413
414 /**
415 * Verifies that the current model contains all the given statements.
416 *
417 * @param statements
418 * list of statements to be verified.
419 *
420 * @throws RepositoryException
421 * if an error is encountered whilst loading content from a storage connection
422 *
423 */
424 public void assertContainsModel(Statement[] statements) throws RepositoryException {
425 for (Statement statement : statements) {
426 assertContains(statement);
427 }
428 }
429
430 /**
431 * Verifies that the current model contains all the statements declared in the specified <code>modelFile</code>.
432 *
433 * @param modelResource
434 * the resource containing the model.
435 *
436 * @throws RDFHandlerException
437 * if there is an error within the {@link org.eclipse.rdf4j.rio.RDFHandler}
438 * @throws IOException
439 * if there is an error processing the input data
440 * @throws RDFParseException
441 * if there is an exception parsing the RDF stream
442 * @throws RepositoryException
443 * if an error is encountered whilst loading content from a storage connection
444 *
445 */
446 public void assertContainsModel(String modelResource)
447 throws RDFHandlerException, IOException, RDFParseException, RepositoryException {
448 getConnection().remove(null, SINDICE.getInstance().date, (Value) null, (Resource) null);
449 getConnection().remove(null, SINDICE.getInstance().size, (Value) null, (Resource) null);
450 assertContainsModel(RDFUtils.parseRDF(modelResource));
451 }
452
453 /**
454 * Asserts that the given pattern <code>(s p o)</code> satisfies the expected number of statements.
455 *
456 * @param s
457 * subject.
458 * @param p
459 * predicate.
460 * @param o
461 * object.
462 * @param expected
463 * expected matches.
464 *
465 * @throws RepositoryException
466 * if an error is encountered whilst loading content from a storage connection
467 *
468 */
469 protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
470 throws RDFHandlerException, RepositoryException {
471 int statementsSize = getStatementsSize(s, p, o);
472 if (statementsSize != expected) {
473 final ByteArrayOutputStream baos = new ByteArrayOutputStream();
474 PrintStream ps = new PrintStream(baos, true, StandardCharsets.UTF_8);
475 getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, ps));
476 }
477
478 Assert.assertEquals("Unexpected number of matching statements.", expected, statementsSize);
479 }
480
481 /**
482 * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements.
483 *
484 * @param p
485 * predicate.
486 * @param o
487 * object.
488 * @param expected
489 * expected matches.
490 *
491 * @throws RepositoryException
492 * if an error is encountered whilst loading content from a storage connection
493 *
494 */
495 protected void assertStatementsSize(IRI p, Value o, int expected) throws RDFHandlerException, RepositoryException {
496 assertStatementsSize(null, p, o, expected);
497 }
498
499 /**
500 * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements.
501 *
502 * @param p
503 * predicate.
504 * @param o
505 * object.
506 * @param expected
507 * expected matches.
508 *
509 * @throws RepositoryException
510 * if an error is encountered whilst loading content from a storage connection
511 *
512 */
513 protected void assertStatementsSize(IRI p, String o, int expected) throws RDFHandlerException, RepositoryException {
514 assertStatementsSize(p, o == null ? null : RDFUtils.literal(o), expected);
515 }
516
517 /**
518 * Asserts that the given pattern <code>(s p _)</code> is not present.
519 *
520 * @param s
521 * subject.
522 * @param p
523 * predicate.
524 *
525 * @throws RepositoryException
526 * if an error is encountered whilst loading content from a storage connection
527 *
528 */
529 protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
530 RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true);
531 try {
532 Assert.assertFalse("Expected no statements.", statements.hasNext());
533 } finally {
534 statements.close();
535 }
536 }
537
538 /**
539 * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it is expected to exists and be just one.
540 *
541 * @param p
542 * predicate.
543 * @param o
544 * object.
545 *
546 * @return the matching blank subject.
547 *
548 * @throws RepositoryException
549 * if an error is encountered whilst loading content from a storage connection
550 *
551 */
552 protected Resource findExactlyOneBlankSubject(IRI p, Value o) throws RepositoryException {
553 RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
554 try {
555 Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
556 Statement stmt = it.next();
557 Resource result = stmt.getSubject();
558 Assert.assertTrue(getFailedExtractionMessage(), result instanceof BNode);
559 Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
560 return result;
561 } finally {
562 it.close();
563 }
564 }
565
566 /**
567 * Returns the object matching the pattern <code>(s p o)</code>, it is expected to exists and be just one.
568 *
569 * @param s
570 * subject.
571 * @param p
572 * predicate.
573 *
574 * @return the matching object.
575 *
576 * @throws RepositoryException
577 * if an error is encountered whilst loading content from a storage connection
578 *
579 */
580 protected Value findExactlyOneObject(Resource s, IRI p) throws RepositoryException {
581 RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
582 try {
583 Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
584 return it.next().getObject();
585 } finally {
586 it.close();
587 }
588 }
589
590 /**
591 * Returns all the subjects matching the pattern <code>(s? p o)</code>.
592 *
593 * @param p
594 * predicate.
595 * @param o
596 * object.
597 *
598 * @return list of matching subjects.
599 *
600 * @throws RepositoryException
601 * if an error is encountered whilst loading content from a storage connection
602 *
603 */
604 protected List<Resource> findSubjects(IRI p, Value o) throws RepositoryException {
605 RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
606 List<Resource> subjects = new ArrayList<Resource>();
607 try {
608 Statement statement;
609 while (it.hasNext()) {
610 statement = it.next();
611 subjects.add(statement.getSubject());
612 }
613 } finally {
614 it.close();
615 }
616 return subjects;
617 }
618
619 /**
620 * Returns all the objects matching the pattern <code>(s p _)</code>.
621 *
622 * @param s
623 * predicate.
624 * @param p
625 * predicate.
626 *
627 * @return list of matching objects.
628 *
629 * @throws RepositoryException
630 * if an error is encountered whilst loading content from a storage connection
631 *
632 */
633 protected List<Value> findObjects(Resource s, IRI p) throws RepositoryException {
634 RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
635 List<Value> objects = new ArrayList<Value>();
636 try {
637 Statement statement;
638 while (it.hasNext()) {
639 statement = it.next();
640 objects.add(statement.getObject());
641 }
642 } finally {
643 it.close();
644 }
645 return objects;
646 }
647
648 /**
649 * Finds the object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
650 *
651 * @param s
652 * subject.
653 * @param p
654 * predicate
655 *
656 * @return matching object.
657 *
658 * @throws org.eclipse.rdf4j.repository.RepositoryException
659 * if an error is encountered whilst loading content from a storage connection
660 */
661 protected Value findObject(Resource s, IRI p) throws RepositoryException {
662 RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true);
663 try {
664 Assert.assertTrue("Expected at least a statement.", statements.hasNext());
665 return (statements.next().getObject());
666 } finally {
667 statements.close();
668 }
669 }
670
671 /**
672 * Finds the resource object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
673 *
674 * @param s
675 * subject.
676 * @param p
677 * predicate.
678 *
679 * @return matching object.
680 *
681 * @throws RepositoryException
682 * if an error is encountered whilst loading content from a storage connection
683 *
684 */
685 protected Resource findObjectAsResource(Resource s, IRI p) throws RepositoryException {
686 final Value v = findObject(s, p);
687 try {
688 return (Resource) v;
689 } catch (ClassCastException cce) {
690 Assert.fail("Expected resource object, found: " + v.getClass().getSimpleName());
691 throw new IllegalStateException();
692 }
693 }
694
695 /**
696 * Finds the literal object matching the pattern <code>(s p _)</code>, asserts to find exactly one result.
697 *
698 * @param s
699 * subject.
700 * @param p
701 * predicate.
702 *
703 * @return matching object.
704 *
705 * @throws RepositoryException
706 * if an error is encountered whilst loading content from a storage connection
707 *
708 */
709 protected String findObjectAsLiteral(Resource s, IRI p) throws RepositoryException {
710 return findObject(s, p).stringValue();
711 }
712
713 /**
714 * Dumps the extracted model in <i>Turtle</i> format.
715 *
716 * @return a string containing the model in Turtle.
717 *
718 * @throws RepositoryException
719 * if an error is encountered whilst loading content from a storage connection
720 *
721 */
722 protected String dumpModelToTurtle() throws RepositoryException {
723 StringWriter w = new StringWriter();
724 try {
725 conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
726 return w.toString();
727 } catch (RDFHandlerException ex) {
728 throw new RuntimeException(ex);
729 }
730 }
731
732 /**
733 * Dumps the extracted model in <i>NQuads</i> format.
734 *
735 * @return a string containing the model in NQuads.
736 *
737 * @throws RepositoryException
738 * if an error is encountered whilst loading content from a storage connection
739 *
740 */
741 protected String dumpModelToNQuads() throws RepositoryException {
742 StringWriter w = new StringWriter();
743 try {
744 conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
745 return w.toString();
746 } catch (RDFHandlerException ex) {
747 throw new RuntimeException(ex);
748 }
749 }
750
751 /**
752 * Dumps the extracted model in <i>RDFXML</i> format.
753 *
754 * @return a string containing the model in RDFXML.
755 *
756 * @throws RepositoryException
757 * if an error is encountered whilst loading content from a storage connection
758 *
759 */
760 protected String dumpModelToRDFXML() throws RepositoryException {
761 StringWriter w = new StringWriter();
762 try {
763 conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
764 return w.toString();
765 } catch (RDFHandlerException ex) {
766 throw new RuntimeException(ex);
767 }
768 }
769
770 /**
771 * Dumps the list of statements contained in the extracted model.
772 *
773 * @return list of extracted statements.
774 *
775 * @throws RepositoryException
776 * if an error is encountered whilst loading content from a storage connection
777 *
778 */
779 protected List<Statement> dumpAsListOfStatements() throws RepositoryException {
780 return Iterations.asList(conn.getStatements(null, null, null, false));
781 }
782
783 /**
784 * @return string containing human readable statements.
785 *
786 * @throws RepositoryException
787 * if an error is encountered whilst loading content from a storage connection
788 *
789 */
790 protected String dumpHumanReadableTriples() throws RepositoryException {
791 StringBuilder sb = new StringBuilder();
792 RepositoryResult<Statement> result = conn.getStatements(null, null, null, false);
793 while (result.hasNext()) {
794 Statement statement = result.next();
795 sb.append(String.format(Locale.ROOT, "%s %s %s %s\n", statement.getSubject(), statement.getPredicate(),
796 statement.getObject(), statement.getContext()));
797
798 }
799 return sb.toString();
800 }
801
802 /**
803 * Checks that a statement is contained in the extracted model. If the statement declares bnodes, they are replaced
804 * with <code>_</code> patterns.
805 *
806 * @param statement
807 * an RDF {@link org.eclipse.rdf4j.model.Statement} implementation
808 *
809 * @throws RepositoryException
810 * if an error is encountered whilst loading content from a storage connection
811 *
812 */
813 // TODO: bnode check is too weak, introduce graph omomorphism check.
814 protected void assertContains(Statement statement) throws RepositoryException {
815 Assert.assertTrue("Cannot find statement " + statement + " in model.",
816 conn.hasStatement(statement.getSubject() instanceof BNode ? null : statement.getSubject(),
817 statement.getPredicate(), statement.getObject() instanceof BNode ? null : statement.getObject(),
818 false));
819 }
820
821 /**
822 * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> is a literal.
823 *
824 * @param s
825 * subject.
826 * @param p
827 * predicate.
828 * @param l
829 * literal content.
830 *
831 * @throws RepositoryException
832 * if an error is encountered whilst loading content from a storage connection
833 *
834 */
835 protected void assertContains(Resource s, IRI p, String l) throws RepositoryException {
836 assertContains(s, p, RDFUtils.literal(l));
837 }
838
839 /**
840 * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> is a language literal.
841 *
842 * @param s
843 * subject.
844 * @param p
845 * predicate.
846 * @param l
847 * literal content.
848 * @param lang
849 * literal language.
850 *
851 * @throws RepositoryException
852 * if an error is encountered whilst loading content from a storage connection
853 *
854 */
855 protected void assertContains(Resource s, IRI p, String l, String lang) throws RepositoryException {
856 assertContains(s, p, RDFUtils.literal(l, lang));
857 }
858
859 /**
860 * Returns all statements matching the pattern <code>(s p o)</code>.
861 *
862 * @param s
863 * subject.
864 * @param p
865 * predicate.
866 * @param o
867 * object.
868 *
869 * @return list of statements.
870 *
871 * @throws RepositoryException
872 * if an error is encountered whilst loading content from a storage connection
873 *
874 */
875 protected RepositoryResult<Statement> getStatements(Resource s, IRI p, Value o) throws RepositoryException {
876 return conn.getStatements(s, p, o, false);
877 }
878
879 /**
880 * Counts all statements matching the pattern <code>(s p o)</code>.
881 *
882 * @param s
883 * subject.
884 * @param p
885 * predicate.
886 * @param o
887 * object.
888 *
889 * @return number of matches.
890 *
891 * @throws RepositoryException
892 * if an error is encountered whilst loading content from a storage connection
893 *
894 */
895 protected int getStatementsSize(Resource s, IRI p, Value o) throws RepositoryException {
896 RepositoryResult<Statement> result = getStatements(s, p, o);
897 int count = 0;
898 try {
899 while (result.hasNext()) {
900 result.next();
901 count++;
902 }
903 } finally {
904 result.close();
905 }
906 return count;
907 }
908
909 private String getFailedExtractionMessage() throws RepositoryException {
910 return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
911 }
912
913 }