1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.any23.extractor;
19
20 import org.apache.any23.configuration.DefaultConfiguration;
21 import org.apache.any23.extractor.html.HTMLMetaExtractorFactory;
22 import org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory;
23 import org.apache.any23.extractor.rdfa.RDFaExtractorFactory;
24
25 import java.util.ArrayList;
26 import java.util.Collections;
27 import java.util.List;
28
29 /**
30 * Singleton class acting as a register for all the various {@link Extractor}.
31 */
32 @SuppressWarnings("rawtypes")
33 public class ExtractorRegistryImpl extends
34 org.eclipse.rdf4j.common.lang.service.ServiceRegistry<String, ExtractorFactory> implements ExtractorRegistry {
35
36 /**
37 * The instance.
38 */
39 private static ExtractorRegistry instance = null;
40
41 /**
42 * Public constructor for ExtractorRegistryImpl. Should normally call getInstance.
43 */
44 public ExtractorRegistryImpl() {
45 super(ExtractorFactory.class);
46 }
47
48 /**
49 * @return returns the {@link ExtractorRegistry} instance.
50 */
51 public static ExtractorRegistry getInstance() {
52 // Thread-safe
53 synchronized (ExtractorRegistry.class) {
54 final DefaultConfiguration conf = DefaultConfiguration.singleton();
55 if (instance == null) {
56 instance = new ExtractorRegistryImpl();
57
58 if (conf.getFlagProperty("any23.extraction.rdfa.programmatic")) {
59 instance.unregister(RDFaExtractorFactory.NAME);
60 // FIXME: Unregister RDFaExtractor if flag is not set
61 // instance.register(RDFa11Extractor.factory);
62 } else {
63 instance.unregister(RDFa11ExtractorFactory.NAME);
64 // FIXME: Unregister RDFaExtractor if flag is set
65 // instance.register(RDFaExtractor.factory);
66 }
67 if (!conf.getFlagProperty("any23.extraction.head.meta")) {
68 instance.unregister(HTMLMetaExtractorFactory.NAME);
69 // FIXME: Unregister HTMLMetaExtractor if this flag is not set
70 // instance.register(HTMLMetaExtractor.factory);
71 }
72 }
73 }
74 return instance;
75 }
76
77 /**
78 * Registers an {@link ExtractorFactory}.
79 *
80 * @param factory
81 * the {@link org.apache.any23.extractor.ExtractorFactory} to register
82 *
83 * @throws IllegalArgumentException
84 * if trying to register a {@link ExtractorFactory} with a that already exists in the registry.
85 */
86 @Override
87 public void register(ExtractorFactory<?> factory) {
88 this.add(factory);
89 }
90
91 /**
92 * Unregisters the {@link ExtractorFactory} with the given name.
93 *
94 * @param name
95 * The name of the ExtractorFactory to unregister.
96 */
97 @Override
98 public void unregister(String name) {
99 if (this.has(name)) {
100 this.remove(this.get(name).get());
101 }
102 }
103
104 /**
105 *
106 * Retrieves a {@link ExtractorFactory} given its name
107 *
108 * @param name
109 * of the desired factory
110 *
111 * @return the {@link ExtractorFactory} associated to the provided name
112 *
113 * @throws IllegalArgumentException
114 * if there is not a {@link ExtractorFactory} associated to the provided name.
115 */
116 @Override
117 public ExtractorFactory<?> getFactory(final String name) {
118 return this.get(name).orElseThrow(() -> new IllegalArgumentException("Unregistered extractor name: " + name));
119 }
120
121 /**
122 * @return an {@link ExtractorGroup} with all the registered {@link Extractor}.
123 */
124 @Override
125 public ExtractorGroup getExtractorGroup() {
126 return getExtractorGroup(getAllNames());
127 }
128
129 /**
130 * Returns an {@link ExtractorGroup} containing the {@link ExtractorFactory} mathing the names provided as input.
131 *
132 * @param names
133 * a {@link java.util.List} containing the names of the desired {@link ExtractorFactory}.
134 *
135 * @return the extraction group.
136 */
137 @Override
138 public ExtractorGroup getExtractorGroup(List<String> names) {
139 List<ExtractorFactory<?>> members = new ArrayList<>(names.size());
140 for (String name : names) {
141 members.add(getFactory(name));
142 }
143 return new ExtractorGroup(members);
144 }
145
146 /**
147 *
148 * @param name
149 * of the {@link ExtractorFactory}
150 *
151 * @return <code>true</code> if is there a {@link ExtractorFactory} associated to the provided name.
152 */
153 @Override
154 public boolean isRegisteredName(String name) {
155 return this.has(name);
156 }
157
158 /**
159 * Returns the names of all registered extractors, sorted alphabetically.
160 */
161 @Override
162 public List<String> getAllNames() {
163 List<String> result = new ArrayList<>(this.getKeys());
164 Collections.sort(result);
165 return result;
166 }
167
168 @Override
169 protected String getKey(ExtractorFactory service) {
170 return service.getExtractorName();
171 }
172
173 }