| 1 | |
|
| 2 | |
|
| 3 | |
|
| 4 | |
|
| 5 | |
|
| 6 | |
|
| 7 | |
|
| 8 | |
|
| 9 | |
|
| 10 | |
|
| 11 | |
|
| 12 | |
|
| 13 | |
|
| 14 | |
|
| 15 | |
|
| 16 | |
|
| 17 | |
|
| 18 | |
package org.apache.any23.extractor.rdf; |
| 19 | |
|
| 20 | |
import org.apache.any23.extractor.ErrorReporter; |
| 21 | |
import org.apache.any23.extractor.ExtractionContext; |
| 22 | |
import org.apache.any23.extractor.ExtractionResult; |
| 23 | |
import org.apache.any23.io.nquads.NQuadsParser; |
| 24 | |
import org.apache.any23.rdf.Any23ValueFactoryWrapper; |
| 25 | |
import org.openrdf.model.impl.ValueFactoryImpl; |
| 26 | |
import org.openrdf.rio.ParseErrorListener; |
| 27 | |
import org.openrdf.rio.RDFHandlerException; |
| 28 | |
import org.openrdf.rio.RDFParseException; |
| 29 | |
import org.openrdf.rio.RDFParser; |
| 30 | |
import org.openrdf.rio.ntriples.NTriplesParser; |
| 31 | |
import org.openrdf.rio.rdfxml.RDFXMLParser; |
| 32 | |
import org.openrdf.rio.trix.TriXParser; |
| 33 | |
import org.openrdf.rio.turtle.TurtleParser; |
| 34 | |
import org.slf4j.Logger; |
| 35 | |
import org.slf4j.LoggerFactory; |
| 36 | |
|
| 37 | |
import java.io.IOException; |
| 38 | |
import java.io.InputStream; |
| 39 | |
import java.io.Reader; |
| 40 | |
|
| 41 | |
|
| 42 | |
|
| 43 | |
|
| 44 | |
|
| 45 | |
|
| 46 | |
|
| 47 | 0 | public class RDFParserFactory { |
| 48 | |
|
| 49 | 0 | private static final Logger logger = LoggerFactory.getLogger(RDFParserFactory.class); |
| 50 | |
|
| 51 | |
private static RDFParserFactory instance; |
| 52 | |
|
| 53 | |
public static RDFParserFactory getInstance() { |
| 54 | 0 | if(instance == null) { |
| 55 | 0 | instance = new RDFParserFactory(); |
| 56 | |
} |
| 57 | 0 | return instance; |
| 58 | |
} |
| 59 | |
|
| 60 | |
|
| 61 | |
|
| 62 | |
|
| 63 | |
|
| 64 | |
|
| 65 | |
|
| 66 | |
|
| 67 | |
|
| 68 | |
|
| 69 | |
public TurtleParser getTurtleParserInstance( |
| 70 | |
final boolean verifyDataType, |
| 71 | |
final boolean stopAtFirstError, |
| 72 | |
final ExtractionContext extractionContext, |
| 73 | |
final ExtractionResult extractionResult |
| 74 | |
) { |
| 75 | 0 | if (extractionResult == null) { |
| 76 | 0 | throw new NullPointerException("extractionResult cannot be null."); |
| 77 | |
} |
| 78 | 0 | final TurtleParser parser = new ExtendedTurtleParser(); |
| 79 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
| 80 | 0 | return parser; |
| 81 | |
} |
| 82 | |
|
| 83 | |
|
| 84 | |
|
| 85 | |
|
| 86 | |
|
| 87 | |
|
| 88 | |
|
| 89 | |
|
| 90 | |
|
| 91 | |
|
| 92 | |
public RDFXMLParser getRDFXMLParser( |
| 93 | |
final boolean verifyDataType, |
| 94 | |
final boolean stopAtFirstError, |
| 95 | |
final ExtractionContext extractionContext, |
| 96 | |
final ExtractionResult extractionResult |
| 97 | |
) { |
| 98 | 0 | final RDFXMLParser parser = new RDFXMLParser(); |
| 99 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
| 100 | 0 | return parser; |
| 101 | |
} |
| 102 | |
|
| 103 | |
|
| 104 | |
|
| 105 | |
|
| 106 | |
|
| 107 | |
|
| 108 | |
|
| 109 | |
|
| 110 | |
|
| 111 | |
|
| 112 | |
public NTriplesParser getNTriplesParser( |
| 113 | |
final boolean verifyDataType, |
| 114 | |
final boolean stopAtFirstError, |
| 115 | |
final ExtractionContext extractionContext, |
| 116 | |
final ExtractionResult extractionResult |
| 117 | |
) { |
| 118 | 0 | final NTriplesParser parser = new NTriplesParser(); |
| 119 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
| 120 | 0 | return parser; |
| 121 | |
} |
| 122 | |
|
| 123 | |
|
| 124 | |
|
| 125 | |
|
| 126 | |
|
| 127 | |
|
| 128 | |
|
| 129 | |
|
| 130 | |
|
| 131 | |
|
| 132 | |
public NQuadsParser getNQuadsParser( |
| 133 | |
final boolean verifyDataType, |
| 134 | |
final boolean stopAtFirstError, |
| 135 | |
final ExtractionContext extractionContext, |
| 136 | |
final ExtractionResult extractionResult |
| 137 | |
) { |
| 138 | 0 | final NQuadsParser parser = new NQuadsParser(); |
| 139 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
| 140 | 0 | return parser; |
| 141 | |
} |
| 142 | |
|
| 143 | |
|
| 144 | |
|
| 145 | |
|
| 146 | |
|
| 147 | |
|
| 148 | |
|
| 149 | |
|
| 150 | |
|
| 151 | |
|
| 152 | |
public TriXParser getTriXParser( |
| 153 | |
final boolean verifyDataType, |
| 154 | |
final boolean stopAtFirstError, |
| 155 | |
final ExtractionContext extractionContext, |
| 156 | |
final ExtractionResult extractionResult |
| 157 | |
) { |
| 158 | 0 | final TriXParser parser = new TriXParser(); |
| 159 | 0 | configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult); |
| 160 | 0 | return parser; |
| 161 | |
} |
| 162 | |
|
| 163 | |
|
| 164 | |
|
| 165 | |
|
| 166 | |
|
| 167 | |
|
| 168 | |
|
| 169 | |
|
| 170 | |
|
| 171 | |
|
| 172 | |
|
| 173 | |
|
| 174 | |
private void configureParser( |
| 175 | |
final RDFParser parser, |
| 176 | |
final boolean verifyDataType, |
| 177 | |
final boolean stopAtFirstError, |
| 178 | |
final ExtractionContext extractionContext, |
| 179 | |
final ExtractionResult extractionResult |
| 180 | |
) { |
| 181 | 0 | parser.setDatatypeHandling( |
| 182 | |
verifyDataType ? RDFParser.DatatypeHandling.VERIFY : RDFParser.DatatypeHandling.IGNORE |
| 183 | |
); |
| 184 | 0 | parser.setStopAtFirstError(stopAtFirstError); |
| 185 | 0 | parser.setParseErrorListener( new InternalParseErrorListener(extractionResult) ); |
| 186 | 0 | parser.setValueFactory( |
| 187 | |
new Any23ValueFactoryWrapper( |
| 188 | |
ValueFactoryImpl.getInstance(), |
| 189 | |
extractionResult, |
| 190 | |
extractionContext.getDefaultLanguage() |
| 191 | |
) |
| 192 | |
); |
| 193 | 0 | parser.setRDFHandler(new RDFHandlerAdapter(extractionResult)); |
| 194 | 0 | } |
| 195 | |
|
| 196 | |
|
| 197 | |
|
| 198 | |
|
| 199 | |
private class InternalParseErrorListener implements ParseErrorListener { |
| 200 | |
|
| 201 | |
private final ErrorReporter extractionResult; |
| 202 | |
|
| 203 | 0 | public InternalParseErrorListener(ErrorReporter er) { |
| 204 | 0 | extractionResult = er; |
| 205 | 0 | } |
| 206 | |
|
| 207 | |
public void warning(String msg, int lineNo, int colNo) { |
| 208 | |
try { |
| 209 | 0 | extractionResult.notifyError(ExtractionResult.ErrorLevel.WARN, msg, lineNo, colNo); |
| 210 | 0 | } catch (Exception e) { |
| 211 | 0 | notifyExceptionInNotification(e); |
| 212 | 0 | } |
| 213 | 0 | } |
| 214 | |
|
| 215 | |
public void error(String msg, int lineNo, int colNo) { |
| 216 | |
try { |
| 217 | 0 | extractionResult.notifyError(ExtractionResult.ErrorLevel.ERROR, msg, lineNo, colNo); |
| 218 | 0 | } catch (Exception e) { |
| 219 | 0 | notifyExceptionInNotification(e); |
| 220 | 0 | } |
| 221 | 0 | } |
| 222 | |
|
| 223 | |
public void fatalError(String msg, int lineNo, int colNo) { |
| 224 | |
try { |
| 225 | 0 | extractionResult.notifyError(ExtractionResult.ErrorLevel.FATAL, msg, lineNo, colNo); |
| 226 | 0 | } catch (Exception e) { |
| 227 | 0 | notifyExceptionInNotification(e); |
| 228 | 0 | } |
| 229 | 0 | } |
| 230 | |
|
| 231 | |
private void notifyExceptionInNotification(Exception e) { |
| 232 | 0 | if (logger != null) { |
| 233 | 0 | logger.error("An exception occurred while notifying an error.", e); |
| 234 | |
} |
| 235 | 0 | } |
| 236 | |
} |
| 237 | |
|
| 238 | |
|
| 239 | |
|
| 240 | |
|
| 241 | |
|
| 242 | 0 | private class ExtendedTurtleParser extends TurtleParser { |
| 243 | |
@Override |
| 244 | |
public void parse(Reader reader, String baseURI) |
| 245 | |
throws IOException, RDFParseException, RDFHandlerException { |
| 246 | 0 | setNamespace("", baseURI); |
| 247 | 0 | super.parse(reader, baseURI); |
| 248 | 0 | } |
| 249 | |
|
| 250 | |
@Override |
| 251 | |
public void parse(InputStream in, String baseURI) |
| 252 | |
throws IOException, RDFParseException, RDFHandlerException { |
| 253 | 0 | setNamespace("", baseURI); |
| 254 | 0 | super.parse(in, baseURI); |
| 255 | 0 | } |
| 256 | |
} |
| 257 | |
} |