| 1 | /* |
| 2 | * Copyright 2006-2007 the original author or authors. |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package org.springframework.batch.item.xml; |
| 18 | |
| 19 | import java.io.InputStream; |
| 20 | import java.util.NoSuchElementException; |
| 21 | |
| 22 | import javax.xml.namespace.QName; |
| 23 | import javax.xml.stream.XMLEventReader; |
| 24 | import javax.xml.stream.XMLInputFactory; |
| 25 | import javax.xml.stream.XMLStreamException; |
| 26 | import javax.xml.stream.events.EndElement; |
| 27 | import javax.xml.stream.events.StartElement; |
| 28 | import javax.xml.stream.events.XMLEvent; |
| 29 | |
| 30 | import org.apache.commons.logging.Log; |
| 31 | import org.apache.commons.logging.LogFactory; |
| 32 | import org.springframework.batch.item.NonTransientResourceException; |
| 33 | import org.springframework.batch.item.file.ResourceAwareItemReaderItemStream; |
| 34 | import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader; |
| 35 | import org.springframework.batch.item.xml.stax.DefaultFragmentEventReader; |
| 36 | import org.springframework.batch.item.xml.stax.FragmentEventReader; |
| 37 | import org.springframework.beans.factory.InitializingBean; |
| 38 | import org.springframework.core.io.Resource; |
| 39 | import org.springframework.oxm.Unmarshaller; |
| 40 | import org.springframework.util.Assert; |
| 41 | import org.springframework.util.ClassUtils; |
| 42 | |
| 43 | /** |
| 44 | * Item reader for reading XML input based on StAX. |
| 45 | * |
| 46 | * It extracts fragments from the input XML document which correspond to records for processing. The fragments are |
| 47 | * wrapped with StartDocument and EndDocument events so that the fragments can be further processed like standalone XML |
| 48 | * documents. |
| 49 | * |
| 50 | * The implementation is *not* thread-safe. |
| 51 | * |
| 52 | * @author Robert Kasanicky |
| 53 | */ |
| 54 | public class StaxEventItemReader<T> extends AbstractItemCountingItemStreamItemReader<T> implements |
| 55 | ResourceAwareItemReaderItemStream<T>, InitializingBean { |
| 56 | |
| 57 | private static final Log logger = LogFactory.getLog(StaxEventItemReader.class); |
| 58 | |
| 59 | private FragmentEventReader fragmentReader; |
| 60 | |
| 61 | private XMLEventReader eventReader; |
| 62 | |
| 63 | private Unmarshaller unmarshaller; |
| 64 | |
| 65 | private Resource resource; |
| 66 | |
| 67 | private InputStream inputStream; |
| 68 | |
| 69 | private String fragmentRootElementName; |
| 70 | |
| 71 | private boolean noInput; |
| 72 | |
| 73 | private boolean strict = true; |
| 74 | |
| 75 | private String fragmentRootElementNameSpace; |
| 76 | |
| 77 | public StaxEventItemReader() { |
| 78 | setName(ClassUtils.getShortName(StaxEventItemReader.class)); |
| 79 | } |
| 80 | |
| 81 | /** |
| 82 | * In strict mode the reader will throw an exception on |
| 83 | * {@link #open(org.springframework.batch.item.ExecutionContext)} if the input resource does not exist. |
| 84 | * @param strict false by default |
| 85 | */ |
| 86 | public void setStrict(boolean strict) { |
| 87 | this.strict = strict; |
| 88 | } |
| 89 | |
| 90 | @Override |
| 91 | public void setResource(Resource resource) { |
| 92 | this.resource = resource; |
| 93 | } |
| 94 | |
| 95 | /** |
| 96 | * @param unmarshaller maps xml fragments corresponding to records to objects |
| 97 | */ |
| 98 | public void setUnmarshaller(Unmarshaller unmarshaller) { |
| 99 | this.unmarshaller = unmarshaller; |
| 100 | } |
| 101 | |
| 102 | /** |
| 103 | * @param fragmentRootElementName name of the root element of the fragment |
| 104 | */ |
| 105 | public void setFragmentRootElementName(String fragmentRootElementName) { |
| 106 | this.fragmentRootElementName = fragmentRootElementName; |
| 107 | } |
| 108 | |
| 109 | /** |
| 110 | * Ensure that all required dependencies for the ItemReader to run are provided after all properties have been set. |
| 111 | * |
| 112 | * @see org.springframework.beans.factory.InitializingBean#afterPropertiesSet() |
| 113 | * @throws IllegalArgumentException if the Resource, FragmentDeserializer or FragmentRootElementName is null, or if |
| 114 | * the root element is empty. |
| 115 | * @throws IllegalStateException if the Resource does not exist. |
| 116 | */ |
| 117 | @Override |
| 118 | public void afterPropertiesSet() throws Exception { |
| 119 | Assert.notNull(unmarshaller, "The Unmarshaller must not be null."); |
| 120 | Assert.hasLength(fragmentRootElementName, "The FragmentRootElementName must not be null"); |
| 121 | if (fragmentRootElementName.contains("{")) { |
| 122 | fragmentRootElementNameSpace = fragmentRootElementName.replaceAll("\\{(.*)\\}.*", "$1"); |
| 123 | fragmentRootElementName = fragmentRootElementName.replaceAll("\\{.*\\}(.*)", "$1"); |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | /** |
| 128 | * Responsible for moving the cursor before the StartElement of the fragment root. |
| 129 | * |
| 130 | * This implementation simply looks for the next corresponding element, it does not care about element nesting. You |
| 131 | * will need to override this method to correctly handle composite fragments. |
| 132 | * |
| 133 | * @return <code>true</code> if next fragment was found, <code>false</code> otherwise. |
| 134 | * |
| 135 | * @throws NonTransientResourceException if the cursor could not be moved. This will be treated as fatal and |
| 136 | * subsequent calls to read will return null. |
| 137 | */ |
| 138 | protected boolean moveCursorToNextFragment(XMLEventReader reader) throws NonTransientResourceException { |
| 139 | try { |
| 140 | while (true) { |
| 141 | while (reader.peek() != null && !reader.peek().isStartElement()) { |
| 142 | reader.nextEvent(); |
| 143 | } |
| 144 | if (reader.peek() == null) { |
| 145 | return false; |
| 146 | } |
| 147 | QName startElementName = ((StartElement) reader.peek()).getName(); |
| 148 | if (startElementName.getLocalPart().equals(fragmentRootElementName)) { |
| 149 | if (fragmentRootElementNameSpace == null |
| 150 | || startElementName.getNamespaceURI().equals(fragmentRootElementNameSpace)) { |
| 151 | return true; |
| 152 | } |
| 153 | } |
| 154 | reader.nextEvent(); |
| 155 | |
| 156 | } |
| 157 | } |
| 158 | catch (XMLStreamException e) { |
| 159 | throw new NonTransientResourceException("Error while reading from event reader", e); |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | @Override |
| 164 | protected void doClose() throws Exception { |
| 165 | try { |
| 166 | if (fragmentReader != null) { |
| 167 | fragmentReader.close(); |
| 168 | } |
| 169 | if (inputStream != null) { |
| 170 | inputStream.close(); |
| 171 | } |
| 172 | } |
| 173 | finally { |
| 174 | fragmentReader = null; |
| 175 | inputStream = null; |
| 176 | } |
| 177 | |
| 178 | } |
| 179 | |
| 180 | @Override |
| 181 | protected void doOpen() throws Exception { |
| 182 | Assert.notNull(resource, "The Resource must not be null."); |
| 183 | |
| 184 | noInput = true; |
| 185 | if (!resource.exists()) { |
| 186 | if (strict) { |
| 187 | throw new IllegalStateException("Input resource must exist (reader is in 'strict' mode)"); |
| 188 | } |
| 189 | logger.warn("Input resource does not exist " + resource.getDescription()); |
| 190 | return; |
| 191 | } |
| 192 | if (!resource.isReadable()) { |
| 193 | if (strict) { |
| 194 | throw new IllegalStateException("Input resource must be readable (reader is in 'strict' mode)"); |
| 195 | } |
| 196 | logger.warn("Input resource is not readable " + resource.getDescription()); |
| 197 | return; |
| 198 | } |
| 199 | |
| 200 | inputStream = resource.getInputStream(); |
| 201 | eventReader = XMLInputFactory.newInstance().createXMLEventReader(inputStream); |
| 202 | fragmentReader = new DefaultFragmentEventReader(eventReader); |
| 203 | noInput = false; |
| 204 | |
| 205 | } |
| 206 | |
| 207 | /** |
| 208 | * Move to next fragment and map it to item. |
| 209 | */ |
| 210 | @Override |
| 211 | protected T doRead() throws Exception { |
| 212 | |
| 213 | if (noInput) { |
| 214 | return null; |
| 215 | } |
| 216 | |
| 217 | T item = null; |
| 218 | |
| 219 | boolean success = false; |
| 220 | try { |
| 221 | success = moveCursorToNextFragment(fragmentReader); |
| 222 | } |
| 223 | catch (NonTransientResourceException e) { |
| 224 | // Prevent caller from retrying indefinitely since this is fatal |
| 225 | noInput = true; |
| 226 | throw e; |
| 227 | } |
| 228 | if (success) { |
| 229 | fragmentReader.markStartFragment(); |
| 230 | |
| 231 | try { |
| 232 | @SuppressWarnings("unchecked") |
| 233 | T mappedFragment = (T) unmarshaller.unmarshal(StaxUtils.getSource(fragmentReader)); |
| 234 | item = mappedFragment; |
| 235 | } |
| 236 | finally { |
| 237 | fragmentReader.markFragmentProcessed(); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | return item; |
| 242 | } |
| 243 | |
| 244 | /* |
| 245 | * jumpToItem is overridden because reading in and attempting to bind an entire fragment is unacceptable in a |
| 246 | * restart scenario, and may cause exceptions to be thrown that were already skipped in previous runs. |
| 247 | */ |
| 248 | @Override |
| 249 | protected void jumpToItem(int itemIndex) throws Exception { |
| 250 | for (int i = 0; i < itemIndex; i++) { |
| 251 | try { |
| 252 | readToStartFragment(); |
| 253 | readToEndFragment(); |
| 254 | } catch (NoSuchElementException e) { |
| 255 | if (itemIndex == (i + 1)) { |
| 256 | // we can presume a NoSuchElementException on the last item means the EOF was reached on the last run |
| 257 | return; |
| 258 | } else { |
| 259 | // if NoSuchElementException occurs on an item other than the last one, this indicates a problem |
| 260 | throw e; |
| 261 | } |
| 262 | } |
| 263 | } |
| 264 | } |
| 265 | |
| 266 | /* |
| 267 | * Read until the first StartElement tag that matches the provided fragmentRootElementName. Because there may be any |
| 268 | * number of tags in between where the reader is now and the fragment start, this is done in a loop until the |
| 269 | * element type and name match. |
| 270 | */ |
| 271 | private void readToStartFragment() throws XMLStreamException { |
| 272 | while (true) { |
| 273 | XMLEvent nextEvent = eventReader.nextEvent(); |
| 274 | if (nextEvent.isStartElement() |
| 275 | && ((StartElement) nextEvent).getName().getLocalPart().equals(fragmentRootElementName)) { |
| 276 | return; |
| 277 | } |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | /* |
| 282 | * Read until the first EndElement tag that matches the provided fragmentRootElementName. Because there may be any |
| 283 | * number of tags in between where the reader is now and the fragment end tag, this is done in a loop until the |
| 284 | * element type and name match |
| 285 | */ |
| 286 | private void readToEndFragment() throws XMLStreamException { |
| 287 | while (true) { |
| 288 | XMLEvent nextEvent = eventReader.nextEvent(); |
| 289 | if (nextEvent.isEndElement() |
| 290 | && ((EndElement) nextEvent).getName().getLocalPart().equals(fragmentRootElementName)) { |
| 291 | return; |
| 292 | } |
| 293 | } |
| 294 | } |
| 295 | } |