| 1 | /* |
| 2 | * Copyright 2006-2007 the original author or authors. |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package org.springframework.batch.item.file.separator; |
| 18 | |
| 19 | import java.io.BufferedReader; |
| 20 | import java.io.IOException; |
| 21 | import java.io.InputStreamReader; |
| 22 | import java.util.Arrays; |
| 23 | import java.util.Collection; |
| 24 | import java.util.Collections; |
| 25 | import java.util.HashSet; |
| 26 | import java.util.Iterator; |
| 27 | |
| 28 | import org.springframework.batch.item.ItemReader; |
| 29 | import org.springframework.batch.item.ItemStream; |
| 30 | import org.springframework.batch.item.ItemStreamException; |
| 31 | import org.springframework.batch.item.MarkFailedException; |
| 32 | import org.springframework.batch.item.ResetFailedException; |
| 33 | import org.springframework.batch.item.UnexpectedInputException; |
| 34 | import org.springframework.core.io.Resource; |
| 35 | import org.springframework.util.Assert; |
| 36 | |
| 37 | /** |
| 38 | * An input source that reads lines one by one from a resource. <br/> |
| 39 | * |
| 40 | * A line can consist of multiple lines in the input resource, according to the {@link RecordSeparatorPolicy} in force. |
| 41 | * By default a line is either terminated by a newline (as per {@link BufferedReader#readLine()}), or can be continued |
| 42 | * onto the next line if a field surrounded by quotes (\") contains a newline.<br/> |
| 43 | * |
| 44 | * Comment lines can be indicated using a line prefix (or collection of prefixes) and they will be ignored. The default |
| 45 | * is "#", so lines starting with a pound sign will be ignored.<br/> |
| 46 | * |
| 47 | * All the public methods that interact with the underlying resource (open, close, read etc.) are synchronized on this.<br/> |
| 48 | * |
| 49 | * Package private because this is not intended to be a public API - used internally by the flat file input sources. |
| 50 | * That makes abuses of the fact that it is stateful easier to control.<br/> |
| 51 | * |
| 52 | * @author Dave Syer |
| 53 | * @author Rob Harrop |
| 54 | */ |
| 55 | public class ResourceLineReader implements LineReader, ItemReader { |
| 56 | |
| 57 | private static final Collection DEFAULT_COMMENTS = Collections.singleton("#"); |
| 58 | |
| 59 | private static final String DEFAULT_ENCODING = "ISO-8859-1"; |
| 60 | |
| 61 | private static final int READ_AHEAD_LIMIT = 100000; |
| 62 | |
| 63 | private final Resource resource; |
| 64 | |
| 65 | private final String encoding; |
| 66 | |
| 67 | private Collection comments = DEFAULT_COMMENTS; |
| 68 | |
| 69 | // Encapsulates the state of the reader. |
| 70 | private State state = null; |
| 71 | |
| 72 | private RecordSeparatorPolicy recordSeparatorPolicy = new DefaultRecordSeparatorPolicy(); |
| 73 | |
| 74 | public ResourceLineReader(Resource resource) throws IOException { |
| 75 | this(resource, DEFAULT_ENCODING); |
| 76 | } |
| 77 | |
| 78 | public ResourceLineReader(Resource resource, String encoding) { |
| 79 | Assert.notNull(resource, "'resource' cannot be null."); |
| 80 | Assert.notNull(encoding, "'encoding' cannot be null."); |
| 81 | this.resource = resource; |
| 82 | this.encoding = encoding; |
| 83 | } |
| 84 | |
| 85 | /** |
| 86 | * Setter for the {@link RecordSeparatorPolicy}. Default value is a {@link DefaultRecordSeparatorPolicy}. Ideally |
| 87 | * should not be changed once a reader is in use, but it would not be fatal if it was. |
| 88 | * |
| 89 | * @param recordSeparatorPolicy the new {@link RecordSeparatorPolicy} |
| 90 | */ |
| 91 | public void setRecordSeparatorPolicy(RecordSeparatorPolicy recordSeparatorPolicy) { |
| 92 | /* |
| 93 | * The rest of the code accesses the policy in synchronized blocks, copying the reference before using it. So in |
| 94 | * principle it can be changed in flight - the results might not be what the user expected! |
| 95 | */ |
| 96 | this.recordSeparatorPolicy = recordSeparatorPolicy; |
| 97 | } |
| 98 | |
| 99 | /** |
| 100 | * Setter for comment prefixes. Can be used to ignore header lines as well by using e.g. the first couple of column |
| 101 | * names as a prefix. |
| 102 | * |
| 103 | * @param comments an array of comment line prefixes. |
| 104 | */ |
| 105 | public void setComments(String[] comments) { |
| 106 | this.comments = new HashSet(Arrays.asList(comments)); |
| 107 | } |
| 108 | |
| 109 | /** |
| 110 | * Read the next line from the input resource, ignoring comments, and according to the {@link RecordSeparatorPolicy}. |
| 111 | * |
| 112 | * @return a String. |
| 113 | * |
| 114 | * @see org.springframework.batch.item.ItemReader#read() |
| 115 | */ |
| 116 | public synchronized Object read() { |
| 117 | // Make a copy of the recordSeparatorPolicy reference, in case it is |
| 118 | // changed during a read operation (unlikely, but you never know)... |
| 119 | RecordSeparatorPolicy recordSeparatorPolicy = this.recordSeparatorPolicy; |
| 120 | String line = readLine(); |
| 121 | String record = line; |
| 122 | if (line != null) { |
| 123 | while (line != null && !recordSeparatorPolicy.isEndOfRecord(record)) { |
| 124 | record = recordSeparatorPolicy.preProcess(record) + (line = readLine()); |
| 125 | } |
| 126 | } |
| 127 | return recordSeparatorPolicy.postProcess(record); |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | * @return the next non-comment line |
| 132 | */ |
| 133 | private String readLine() { |
| 134 | return getState().readLine(); |
| 135 | } |
| 136 | |
| 137 | /** |
| 138 | * @return |
| 139 | */ |
| 140 | private State getState() { |
| 141 | if (state == null) { |
| 142 | open(); |
| 143 | } |
| 144 | return state; |
| 145 | } |
| 146 | |
| 147 | /** |
| 148 | * Creates internal state object. |
| 149 | */ |
| 150 | public synchronized void open() { |
| 151 | state = new State(); |
| 152 | state.open(); |
| 153 | } |
| 154 | |
| 155 | /** |
| 156 | * Close the reader associated with this input source. |
| 157 | */ |
| 158 | public synchronized void close() { |
| 159 | if (state == null) { |
| 160 | return; |
| 161 | } |
| 162 | try { |
| 163 | state.close(); |
| 164 | } finally { |
| 165 | state = null; |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | /** |
| 170 | * Getter for current line count (not the current number of lines returned). |
| 171 | * |
| 172 | * @return the current line count. |
| 173 | */ |
| 174 | public int getPosition() { |
| 175 | return getState().getCurrentLineCount(); |
| 176 | } |
| 177 | |
| 178 | /** |
| 179 | * Mark the state for return later with reset. Uses the read-ahead limit from an underlying {@link BufferedReader}, |
| 180 | * which means that there is a limit to how much data can be recovered if the mark needs to be reset.<br/> |
| 181 | * |
| 182 | * Mark is supported as long as this {@link ItemStream} is used in a single-threaded environment. The state backing |
| 183 | * the mark is a single counter, keeping track of the current position, so multiple threads cannot be accommodated. |
| 184 | * |
| 185 | * @see #reset() |
| 186 | * |
| 187 | * @throws MarkFailedException if the mark could not be set. |
| 188 | */ |
| 189 | public synchronized void mark() throws MarkFailedException { |
| 190 | getState().mark(); |
| 191 | } |
| 192 | |
| 193 | /** |
| 194 | * Reset the reader to the last mark. |
| 195 | * |
| 196 | * @see #mark() |
| 197 | * |
| 198 | * @throws ResetFailedException if the reset is unsuccessful, e.g. if the read-ahead limit was breached. |
| 199 | */ |
| 200 | public synchronized void reset() throws ResetFailedException { |
| 201 | getState().reset(); |
| 202 | } |
| 203 | |
| 204 | private boolean isComment(String line) { |
| 205 | for (Iterator iter = comments.iterator(); iter.hasNext();) { |
| 206 | String prefix = (String) iter.next(); |
| 207 | if (line.startsWith(prefix)) { |
| 208 | return true; |
| 209 | } |
| 210 | } |
| 211 | return false; |
| 212 | } |
| 213 | |
| 214 | private class State { |
| 215 | private BufferedReader reader; |
| 216 | |
| 217 | private int currentLineCount = 0; |
| 218 | |
| 219 | private int markedLineCount = -1; |
| 220 | |
| 221 | public String readLine() { |
| 222 | String line = null; |
| 223 | |
| 224 | try { |
| 225 | line = this.reader.readLine(); |
| 226 | if (line == null) { |
| 227 | return null; |
| 228 | } |
| 229 | currentLineCount++; |
| 230 | while (isComment(line)) { |
| 231 | line = reader.readLine(); |
| 232 | if (line == null) { |
| 233 | return null; |
| 234 | } |
| 235 | currentLineCount++; |
| 236 | } |
| 237 | } catch (IOException e) { |
| 238 | throw new UnexpectedInputException("Unable to read from resource '" + resource + "' at line " |
| 239 | + currentLineCount, e); |
| 240 | } |
| 241 | return line; |
| 242 | } |
| 243 | |
| 244 | /** |
| 245 | * |
| 246 | */ |
| 247 | public void open() { |
| 248 | try { |
| 249 | reader = new BufferedReader(new InputStreamReader(resource.getInputStream(), encoding)); |
| 250 | mark(); |
| 251 | } catch (IOException e) { |
| 252 | throw new ItemStreamException("Could not open resource", e); |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | /** |
| 257 | * Close the reader and reset the counters. |
| 258 | */ |
| 259 | public void close() { |
| 260 | |
| 261 | if (reader == null) { |
| 262 | return; |
| 263 | } |
| 264 | try { |
| 265 | reader.close(); |
| 266 | } catch (IOException e) { |
| 267 | throw new ItemStreamException("Could not close reader", e); |
| 268 | } finally { |
| 269 | currentLineCount = 0; |
| 270 | markedLineCount = -1; |
| 271 | } |
| 272 | |
| 273 | } |
| 274 | |
| 275 | /** |
| 276 | * @return the current line count |
| 277 | */ |
| 278 | public int getCurrentLineCount() { |
| 279 | return currentLineCount; |
| 280 | } |
| 281 | |
| 282 | /** |
| 283 | * Mark the underlying reader and set the line counters. |
| 284 | */ |
| 285 | public void mark() throws MarkFailedException { |
| 286 | try { |
| 287 | reader.mark(READ_AHEAD_LIMIT); |
| 288 | markedLineCount = currentLineCount; |
| 289 | } catch (IOException e) { |
| 290 | throw new MarkFailedException("Could not mark reader", e); |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | /** |
| 295 | * Reset the reader and line counters to the last marked position if possible. |
| 296 | */ |
| 297 | public void reset() throws ResetFailedException { |
| 298 | |
| 299 | if (markedLineCount < 0) { |
| 300 | return; |
| 301 | } |
| 302 | try { |
| 303 | this.reader.reset(); |
| 304 | currentLineCount = markedLineCount; |
| 305 | } catch (IOException e) { |
| 306 | throw new ResetFailedException("Could not reset reader", e); |
| 307 | } |
| 308 | |
| 309 | } |
| 310 | |
| 311 | } |
| 312 | |
| 313 | } |