1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.any23.writer;
19
20 import org.apache.any23.extractor.ExtractionContext;
21 import org.eclipse.rdf4j.model.Resource;
22 import org.eclipse.rdf4j.model.IRI;
23 import org.eclipse.rdf4j.model.Value;
24
25 /**
26 * Defines a document based triple handler.
27 */
28 public interface TripleHandler extends AutoCloseable {
29
30 void startDocument(IRI documentIRI) throws TripleHandlerException;
31
32 /**
33 * Informs the handler that a new context has been established. Contexts are not guaranteed to receive any triples,
34 * so they might be closed without any triple.
35 *
36 * @param context
37 * an instantiated {@link org.apache.any23.extractor.ExtractionContext}
38 *
39 * @throws TripleHandlerException
40 * if there is an errr opening the {@link org.apache.any23.extractor.ExtractionContext}
41 */
42 void openContext(ExtractionContext context) throws TripleHandlerException;
43
44 /**
45 * Invoked with a currently open context, notifies the detection of a triple.
46 *
47 * @param s
48 * triple subject, cannot be <code>null</code>.
49 * @param p
50 * triple predicate, cannot be <code>null</code>.
51 * @param o
52 * triple object, cannot be <code>null</code>.
53 * @param g
54 * triple graph, can be <code>null</code>.
55 * @param context
56 * extraction context.
57 *
58 * @throws TripleHandlerException
59 * if there is an error receiving the triple.
60 */
61 void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context) throws TripleHandlerException;
62
63 /**
64 * Invoked with a currently open context, notifies the detection of a namespace.
65 *
66 * @param prefix
67 * namespace prefix.
68 * @param uri
69 * namespace <i>IRI</i>.
70 * @param context
71 * namespace context.
72 *
73 * @throws TripleHandlerException
74 * if there is an error receiving the namespace.
75 */
76 void receiveNamespace(String prefix, String uri, ExtractionContext context) throws TripleHandlerException;
77
78 /**
79 * Informs the handler that no more triples will come from a previously opened context. All contexts are guaranteed
80 * to be closed before the final close(). The document context for each document is guaranteed to be closed after
81 * all local contexts of that document.
82 *
83 * @param context
84 * the context to be closed.
85 *
86 * @throws TripleHandlerException
87 * if there is an error closing the {@link org.apache.any23.extractor.ExtractionContext}.
88 */
89 void closeContext(ExtractionContext context) throws TripleHandlerException;
90
91 /**
92 * Informs the handler that the end of the document has been reached.
93 *
94 * @param documentIRI
95 * document IRI.
96 *
97 * @throws TripleHandlerException
98 * if there is an error ending the document.
99 */
100 void endDocument(IRI documentIRI) throws TripleHandlerException;
101
102 /**
103 * Sets the length of the content to be processed.
104 *
105 * @param contentLength
106 * length of the content being processed.
107 */
108 void setContentLength(long contentLength);
109
110 /**
111 * Will be called last and exactly once.
112 *
113 * @throws TripleHandlerException
114 * if there is an error closing the {@link org.apache.any23.writer.TripleHandler} implementation.
115 */
116 void close() throws TripleHandlerException;
117
118 }