1 | /* |
2 | * Copyright 2006-2007 the original author or authors. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | package org.springframework.batch.item.xml; |
18 | |
19 | import java.io.InputStream; |
20 | import java.util.NoSuchElementException; |
21 | |
22 | import javax.xml.namespace.QName; |
23 | import javax.xml.stream.XMLEventReader; |
24 | import javax.xml.stream.XMLInputFactory; |
25 | import javax.xml.stream.XMLStreamException; |
26 | import javax.xml.stream.events.EndElement; |
27 | import javax.xml.stream.events.StartElement; |
28 | import javax.xml.stream.events.XMLEvent; |
29 | |
30 | import org.apache.commons.logging.Log; |
31 | import org.apache.commons.logging.LogFactory; |
32 | import org.springframework.batch.item.NonTransientResourceException; |
33 | import org.springframework.batch.item.file.ResourceAwareItemReaderItemStream; |
34 | import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader; |
35 | import org.springframework.batch.item.xml.stax.DefaultFragmentEventReader; |
36 | import org.springframework.batch.item.xml.stax.FragmentEventReader; |
37 | import org.springframework.beans.factory.InitializingBean; |
38 | import org.springframework.core.io.Resource; |
39 | import org.springframework.oxm.Unmarshaller; |
40 | import org.springframework.util.Assert; |
41 | import org.springframework.util.ClassUtils; |
42 | |
43 | /** |
44 | * Item reader for reading XML input based on StAX. |
45 | * |
46 | * It extracts fragments from the input XML document which correspond to records for processing. The fragments are |
47 | * wrapped with StartDocument and EndDocument events so that the fragments can be further processed like standalone XML |
48 | * documents. |
49 | * |
50 | * The implementation is *not* thread-safe. |
51 | * |
52 | * @author Robert Kasanicky |
53 | */ |
54 | public class StaxEventItemReader<T> extends AbstractItemCountingItemStreamItemReader<T> implements |
55 | ResourceAwareItemReaderItemStream<T>, InitializingBean { |
56 | |
57 | private static final Log logger = LogFactory.getLog(StaxEventItemReader.class); |
58 | |
59 | private FragmentEventReader fragmentReader; |
60 | |
61 | private XMLEventReader eventReader; |
62 | |
63 | private Unmarshaller unmarshaller; |
64 | |
65 | private Resource resource; |
66 | |
67 | private InputStream inputStream; |
68 | |
69 | private String fragmentRootElementName; |
70 | |
71 | private boolean noInput; |
72 | |
73 | private boolean strict = true; |
74 | |
75 | private String fragmentRootElementNameSpace; |
76 | |
77 | public StaxEventItemReader() { |
78 | setName(ClassUtils.getShortName(StaxEventItemReader.class)); |
79 | } |
80 | |
81 | /** |
82 | * In strict mode the reader will throw an exception on |
83 | * {@link #open(org.springframework.batch.item.ExecutionContext)} if the input resource does not exist. |
84 | * @param strict false by default |
85 | */ |
86 | public void setStrict(boolean strict) { |
87 | this.strict = strict; |
88 | } |
89 | |
90 | @Override |
91 | public void setResource(Resource resource) { |
92 | this.resource = resource; |
93 | } |
94 | |
95 | /** |
96 | * @param unmarshaller maps xml fragments corresponding to records to objects |
97 | */ |
98 | public void setUnmarshaller(Unmarshaller unmarshaller) { |
99 | this.unmarshaller = unmarshaller; |
100 | } |
101 | |
102 | /** |
103 | * @param fragmentRootElementName name of the root element of the fragment |
104 | */ |
105 | public void setFragmentRootElementName(String fragmentRootElementName) { |
106 | this.fragmentRootElementName = fragmentRootElementName; |
107 | } |
108 | |
109 | /** |
110 | * Ensure that all required dependencies for the ItemReader to run are provided after all properties have been set. |
111 | * |
112 | * @see org.springframework.beans.factory.InitializingBean#afterPropertiesSet() |
113 | * @throws IllegalArgumentException if the Resource, FragmentDeserializer or FragmentRootElementName is null, or if |
114 | * the root element is empty. |
115 | * @throws IllegalStateException if the Resource does not exist. |
116 | */ |
117 | @Override |
118 | public void afterPropertiesSet() throws Exception { |
119 | Assert.notNull(unmarshaller, "The Unmarshaller must not be null."); |
120 | Assert.hasLength(fragmentRootElementName, "The FragmentRootElementName must not be null"); |
121 | if (fragmentRootElementName.contains("{")) { |
122 | fragmentRootElementNameSpace = fragmentRootElementName.replaceAll("\\{(.*)\\}.*", "$1"); |
123 | fragmentRootElementName = fragmentRootElementName.replaceAll("\\{.*\\}(.*)", "$1"); |
124 | } |
125 | } |
126 | |
127 | /** |
128 | * Responsible for moving the cursor before the StartElement of the fragment root. |
129 | * |
130 | * This implementation simply looks for the next corresponding element, it does not care about element nesting. You |
131 | * will need to override this method to correctly handle composite fragments. |
132 | * |
133 | * @return <code>true</code> if next fragment was found, <code>false</code> otherwise. |
134 | * |
135 | * @throws NonTransientResourceException if the cursor could not be moved. This will be treated as fatal and |
136 | * subsequent calls to read will return null. |
137 | */ |
138 | protected boolean moveCursorToNextFragment(XMLEventReader reader) throws NonTransientResourceException { |
139 | try { |
140 | while (true) { |
141 | while (reader.peek() != null && !reader.peek().isStartElement()) { |
142 | reader.nextEvent(); |
143 | } |
144 | if (reader.peek() == null) { |
145 | return false; |
146 | } |
147 | QName startElementName = ((StartElement) reader.peek()).getName(); |
148 | if (startElementName.getLocalPart().equals(fragmentRootElementName)) { |
149 | if (fragmentRootElementNameSpace == null |
150 | || startElementName.getNamespaceURI().equals(fragmentRootElementNameSpace)) { |
151 | return true; |
152 | } |
153 | } |
154 | reader.nextEvent(); |
155 | |
156 | } |
157 | } |
158 | catch (XMLStreamException e) { |
159 | throw new NonTransientResourceException("Error while reading from event reader", e); |
160 | } |
161 | } |
162 | |
163 | @Override |
164 | protected void doClose() throws Exception { |
165 | try { |
166 | if (fragmentReader != null) { |
167 | fragmentReader.close(); |
168 | } |
169 | if (inputStream != null) { |
170 | inputStream.close(); |
171 | } |
172 | } |
173 | finally { |
174 | fragmentReader = null; |
175 | inputStream = null; |
176 | } |
177 | |
178 | } |
179 | |
180 | @Override |
181 | protected void doOpen() throws Exception { |
182 | Assert.notNull(resource, "The Resource must not be null."); |
183 | |
184 | noInput = true; |
185 | if (!resource.exists()) { |
186 | if (strict) { |
187 | throw new IllegalStateException("Input resource must exist (reader is in 'strict' mode)"); |
188 | } |
189 | logger.warn("Input resource does not exist " + resource.getDescription()); |
190 | return; |
191 | } |
192 | if (!resource.isReadable()) { |
193 | if (strict) { |
194 | throw new IllegalStateException("Input resource must be readable (reader is in 'strict' mode)"); |
195 | } |
196 | logger.warn("Input resource is not readable " + resource.getDescription()); |
197 | return; |
198 | } |
199 | |
200 | inputStream = resource.getInputStream(); |
201 | eventReader = XMLInputFactory.newInstance().createXMLEventReader(inputStream); |
202 | fragmentReader = new DefaultFragmentEventReader(eventReader); |
203 | noInput = false; |
204 | |
205 | } |
206 | |
207 | /** |
208 | * Move to next fragment and map it to item. |
209 | */ |
210 | @Override |
211 | protected T doRead() throws Exception { |
212 | |
213 | if (noInput) { |
214 | return null; |
215 | } |
216 | |
217 | T item = null; |
218 | |
219 | boolean success = false; |
220 | try { |
221 | success = moveCursorToNextFragment(fragmentReader); |
222 | } |
223 | catch (NonTransientResourceException e) { |
224 | // Prevent caller from retrying indefinitely since this is fatal |
225 | noInput = true; |
226 | throw e; |
227 | } |
228 | if (success) { |
229 | fragmentReader.markStartFragment(); |
230 | |
231 | try { |
232 | @SuppressWarnings("unchecked") |
233 | T mappedFragment = (T) unmarshaller.unmarshal(StaxUtils.getSource(fragmentReader)); |
234 | item = mappedFragment; |
235 | } |
236 | finally { |
237 | fragmentReader.markFragmentProcessed(); |
238 | } |
239 | } |
240 | |
241 | return item; |
242 | } |
243 | |
244 | /* |
245 | * jumpToItem is overridden because reading in and attempting to bind an entire fragment is unacceptable in a |
246 | * restart scenario, and may cause exceptions to be thrown that were already skipped in previous runs. |
247 | */ |
248 | @Override |
249 | protected void jumpToItem(int itemIndex) throws Exception { |
250 | for (int i = 0; i < itemIndex; i++) { |
251 | try { |
252 | readToStartFragment(); |
253 | readToEndFragment(); |
254 | } catch (NoSuchElementException e) { |
255 | if (itemIndex == (i + 1)) { |
256 | // we can presume a NoSuchElementException on the last item means the EOF was reached on the last run |
257 | return; |
258 | } else { |
259 | // if NoSuchElementException occurs on an item other than the last one, this indicates a problem |
260 | throw e; |
261 | } |
262 | } |
263 | } |
264 | } |
265 | |
266 | /* |
267 | * Read until the first StartElement tag that matches the provided fragmentRootElementName. Because there may be any |
268 | * number of tags in between where the reader is now and the fragment start, this is done in a loop until the |
269 | * element type and name match. |
270 | */ |
271 | private void readToStartFragment() throws XMLStreamException { |
272 | while (true) { |
273 | XMLEvent nextEvent = eventReader.nextEvent(); |
274 | if (nextEvent.isStartElement() |
275 | && ((StartElement) nextEvent).getName().getLocalPart().equals(fragmentRootElementName)) { |
276 | return; |
277 | } |
278 | } |
279 | } |
280 | |
281 | /* |
282 | * Read until the first EndElement tag that matches the provided fragmentRootElementName. Because there may be any |
283 | * number of tags in between where the reader is now and the fragment end tag, this is done in a loop until the |
284 | * element type and name match |
285 | */ |
286 | private void readToEndFragment() throws XMLStreamException { |
287 | while (true) { |
288 | XMLEvent nextEvent = eventReader.nextEvent(); |
289 | if (nextEvent.isEndElement() |
290 | && ((EndElement) nextEvent).getName().getLocalPart().equals(fragmentRootElementName)) { |
291 | return; |
292 | } |
293 | } |
294 | } |
295 | } |