1 | /* |
2 | * Copyright 2006-2007 the original author or authors. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | package org.springframework.batch.item.xml; |
18 | |
19 | import java.io.InputStream; |
20 | |
21 | import javax.xml.namespace.QName; |
22 | import javax.xml.stream.XMLEventReader; |
23 | import javax.xml.stream.XMLInputFactory; |
24 | import javax.xml.stream.XMLStreamException; |
25 | import javax.xml.stream.events.EndElement; |
26 | import javax.xml.stream.events.StartElement; |
27 | import javax.xml.stream.events.XMLEvent; |
28 | |
29 | import org.apache.commons.logging.Log; |
30 | import org.apache.commons.logging.LogFactory; |
31 | import org.springframework.batch.item.NonTransientResourceException; |
32 | import org.springframework.batch.item.file.ResourceAwareItemReaderItemStream; |
33 | import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader; |
34 | import org.springframework.batch.item.xml.stax.DefaultFragmentEventReader; |
35 | import org.springframework.batch.item.xml.stax.FragmentEventReader; |
36 | import org.springframework.beans.factory.InitializingBean; |
37 | import org.springframework.core.io.Resource; |
38 | import org.springframework.oxm.Unmarshaller; |
39 | import org.springframework.util.Assert; |
40 | import org.springframework.util.ClassUtils; |
41 | |
42 | /** |
43 | * Item reader for reading XML input based on StAX. |
44 | * |
45 | * It extracts fragments from the input XML document which correspond to records for processing. The fragments are |
46 | * wrapped with StartDocument and EndDocument events so that the fragments can be further processed like standalone XML |
47 | * documents. |
48 | * |
49 | * The implementation is *not* thread-safe. |
50 | * |
51 | * @author Robert Kasanicky |
52 | */ |
53 | public class StaxEventItemReader<T> extends AbstractItemCountingItemStreamItemReader<T> implements |
54 | ResourceAwareItemReaderItemStream<T>, InitializingBean { |
55 | |
56 | private static final Log logger = LogFactory.getLog(StaxEventItemReader.class); |
57 | |
58 | private FragmentEventReader fragmentReader; |
59 | |
60 | private XMLEventReader eventReader; |
61 | |
62 | private Unmarshaller unmarshaller; |
63 | |
64 | private Resource resource; |
65 | |
66 | private InputStream inputStream; |
67 | |
68 | private String fragmentRootElementName; |
69 | |
70 | private boolean noInput; |
71 | |
72 | private boolean strict = true; |
73 | |
74 | private String fragmentRootElementNameSpace; |
75 | |
76 | public StaxEventItemReader() { |
77 | setName(ClassUtils.getShortName(StaxEventItemReader.class)); |
78 | } |
79 | |
80 | /** |
81 | * In strict mode the reader will throw an exception on |
82 | * {@link #open(org.springframework.batch.item.ExecutionContext)} if the input resource does not exist. |
83 | * @param strict false by default |
84 | */ |
85 | public void setStrict(boolean strict) { |
86 | this.strict = strict; |
87 | } |
88 | |
89 | public void setResource(Resource resource) { |
90 | this.resource = resource; |
91 | } |
92 | |
93 | /** |
94 | * @param unmarshaller maps xml fragments corresponding to records to objects |
95 | */ |
96 | public void setUnmarshaller(Unmarshaller unmarshaller) { |
97 | this.unmarshaller = unmarshaller; |
98 | } |
99 | |
100 | /** |
101 | * @param fragmentRootElementName name of the root element of the fragment |
102 | */ |
103 | public void setFragmentRootElementName(String fragmentRootElementName) { |
104 | this.fragmentRootElementName = fragmentRootElementName; |
105 | } |
106 | |
107 | /** |
108 | * Ensure that all required dependencies for the ItemReader to run are provided after all properties have been set. |
109 | * |
110 | * @see org.springframework.beans.factory.InitializingBean#afterPropertiesSet() |
111 | * @throws IllegalArgumentException if the Resource, FragmentDeserializer or FragmentRootElementName is null, or if |
112 | * the root element is empty. |
113 | * @throws IllegalStateException if the Resource does not exist. |
114 | */ |
115 | public void afterPropertiesSet() throws Exception { |
116 | Assert.notNull(unmarshaller, "The Unmarshaller must not be null."); |
117 | Assert.hasLength(fragmentRootElementName, "The FragmentRootElementName must not be null"); |
118 | if (fragmentRootElementName.contains("{")) { |
119 | fragmentRootElementNameSpace = fragmentRootElementName.replaceAll("\\{(.*)\\}.*", "$1"); |
120 | fragmentRootElementName = fragmentRootElementName.replaceAll("\\{.*\\}(.*)", "$1"); |
121 | } |
122 | } |
123 | |
124 | /** |
125 | * Responsible for moving the cursor before the StartElement of the fragment root. |
126 | * |
127 | * This implementation simply looks for the next corresponding element, it does not care about element nesting. You |
128 | * will need to override this method to correctly handle composite fragments. |
129 | * |
130 | * @return <code>true</code> if next fragment was found, <code>false</code> otherwise. |
131 | * |
132 | * @throws NonTransientResourceException if the cursor could not be moved. This will be treated as fatal and |
133 | * subsequent calls to read will return null. |
134 | */ |
135 | protected boolean moveCursorToNextFragment(XMLEventReader reader) throws NonTransientResourceException { |
136 | try { |
137 | while (true) { |
138 | while (reader.peek() != null && !reader.peek().isStartElement()) { |
139 | reader.nextEvent(); |
140 | } |
141 | if (reader.peek() == null) { |
142 | return false; |
143 | } |
144 | QName startElementName = ((StartElement) reader.peek()).getName(); |
145 | if (startElementName.getLocalPart().equals(fragmentRootElementName)) { |
146 | if (fragmentRootElementNameSpace == null |
147 | || startElementName.getNamespaceURI().equals(fragmentRootElementNameSpace)) { |
148 | return true; |
149 | } |
150 | } |
151 | reader.nextEvent(); |
152 | |
153 | } |
154 | } |
155 | catch (XMLStreamException e) { |
156 | throw new NonTransientResourceException("Error while reading from event reader", e); |
157 | } |
158 | } |
159 | |
160 | protected void doClose() throws Exception { |
161 | try { |
162 | if (fragmentReader != null) { |
163 | fragmentReader.close(); |
164 | } |
165 | if (inputStream != null) { |
166 | inputStream.close(); |
167 | } |
168 | } |
169 | finally { |
170 | fragmentReader = null; |
171 | inputStream = null; |
172 | } |
173 | |
174 | } |
175 | |
176 | protected void doOpen() throws Exception { |
177 | Assert.notNull(resource, "The Resource must not be null."); |
178 | |
179 | noInput = true; |
180 | if (!resource.exists()) { |
181 | if (strict) { |
182 | throw new IllegalStateException("Input resource must exist (reader is in 'strict' mode)"); |
183 | } |
184 | logger.warn("Input resource does not exist " + resource.getDescription()); |
185 | return; |
186 | } |
187 | if (!resource.isReadable()) { |
188 | if (strict) { |
189 | throw new IllegalStateException("Input resource must be readable (reader is in 'strict' mode)"); |
190 | } |
191 | logger.warn("Input resource is not readable " + resource.getDescription()); |
192 | return; |
193 | } |
194 | |
195 | inputStream = resource.getInputStream(); |
196 | eventReader = XMLInputFactory.newInstance().createXMLEventReader(inputStream); |
197 | fragmentReader = new DefaultFragmentEventReader(eventReader); |
198 | noInput = false; |
199 | |
200 | } |
201 | |
202 | /** |
203 | * Move to next fragment and map it to item. |
204 | */ |
205 | protected T doRead() throws Exception { |
206 | |
207 | if (noInput) { |
208 | return null; |
209 | } |
210 | |
211 | T item = null; |
212 | |
213 | boolean success = false; |
214 | try { |
215 | success = moveCursorToNextFragment(fragmentReader); |
216 | } |
217 | catch (NonTransientResourceException e) { |
218 | // Prevent caller from retrying indefinitely since this is fatal |
219 | noInput = true; |
220 | throw e; |
221 | } |
222 | if (success) { |
223 | fragmentReader.markStartFragment(); |
224 | |
225 | try { |
226 | @SuppressWarnings("unchecked") |
227 | T mappedFragment = (T) unmarshaller.unmarshal(StaxUtils.getSource(fragmentReader)); |
228 | item = mappedFragment; |
229 | } |
230 | finally { |
231 | fragmentReader.markFragmentProcessed(); |
232 | } |
233 | } |
234 | |
235 | return item; |
236 | } |
237 | |
238 | /* |
239 | * jumpToItem is overridden because reading in and attempting to bind an entire fragment is unacceptable in a |
240 | * restart scenario, and may cause exceptions to be thrown that were already skipped in previous runs. |
241 | */ |
242 | @Override |
243 | protected void jumpToItem(int itemIndex) throws Exception { |
244 | for (int i = 0; i < itemIndex; i++) { |
245 | readToStartFragment(); |
246 | readToEndFragment(); |
247 | } |
248 | } |
249 | |
250 | /* |
251 | * Read until the first StartElement tag that matches the provided fragmentRootElementName. Because there may be any |
252 | * number of tags in between where the reader is now and the fragment start, this is done in a loop until the |
253 | * element type and name match. |
254 | */ |
255 | private void readToStartFragment() throws XMLStreamException { |
256 | while (true) { |
257 | XMLEvent nextEvent = eventReader.nextEvent(); |
258 | if (nextEvent.isStartElement() |
259 | && ((StartElement) nextEvent).getName().getLocalPart().equals(fragmentRootElementName)) { |
260 | return; |
261 | } |
262 | } |
263 | } |
264 | |
265 | /* |
266 | * Read until the first EndElement tag that matches the provided fragmentRootElementName. Because there may be any |
267 | * number of tags in between where the reader is now and the fragment end tag, this is done in a loop until the |
268 | * element type and name match |
269 | */ |
270 | private void readToEndFragment() throws XMLStreamException { |
271 | while (true) { |
272 | XMLEvent nextEvent = eventReader.nextEvent(); |
273 | if (nextEvent.isEndElement() |
274 | && ((EndElement) nextEvent).getName().getLocalPart().equals(fragmentRootElementName)) { |
275 | return; |
276 | } |
277 | } |
278 | } |
279 | } |