1 | /* |
2 | * Copyright 2006-2007 the original author or authors. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | package org.springframework.batch.item.file.separator; |
18 | |
19 | import java.io.BufferedReader; |
20 | import java.io.IOException; |
21 | import java.io.InputStreamReader; |
22 | import java.util.Arrays; |
23 | import java.util.Collection; |
24 | import java.util.Collections; |
25 | import java.util.HashSet; |
26 | import java.util.Iterator; |
27 | |
28 | import org.springframework.batch.item.ExecutionContext; |
29 | import org.springframework.batch.item.ItemReader; |
30 | import org.springframework.batch.item.ItemStream; |
31 | import org.springframework.batch.item.ItemStreamException; |
32 | import org.springframework.batch.item.ItemStreamSupport; |
33 | import org.springframework.batch.item.MarkFailedException; |
34 | import org.springframework.batch.item.ResetFailedException; |
35 | import org.springframework.batch.item.UnexpectedInputException; |
36 | import org.springframework.core.io.Resource; |
37 | import org.springframework.util.Assert; |
38 | |
39 | /** |
40 | * An input source that reads lines one by one from a resource. <br/> |
41 | * |
42 | * A line can consist of multiple lines in the input resource, according to the {@link RecordSeparatorPolicy} in force. |
43 | * By default a line is either terminated by a newline (as per {@link BufferedReader#readLine()}), or can be continued |
44 | * onto the next line if a field surrounded by quotes (\") contains a newline.<br/> |
45 | * |
46 | * Comment lines can be indicated using a line prefix (or collection of prefixes) and they will be ignored. The default |
47 | * is "#", so lines starting with a pound sign will be ignored.<br/> |
48 | * |
49 | * All the public methods that interact with the underlying resource (open, close, read etc.) are synchronized on this.<br/> |
50 | * |
51 | * Package private because this is not intended to be a public API - used internally by the flat file input sources. |
52 | * That makes abuses of the fact that it is stateful easier to control.<br/> |
53 | * |
54 | * @author Dave Syer |
55 | * @author Rob Harrop |
56 | */ |
57 | public class ResourceLineReader extends ItemStreamSupport implements LineReader, ItemReader { |
58 | |
59 | private static final Collection DEFAULT_COMMENTS = Collections.singleton("#"); |
60 | |
61 | private static final String DEFAULT_ENCODING = "ISO-8859-1"; |
62 | |
63 | private static final int READ_AHEAD_LIMIT = 100000; |
64 | |
65 | private final Resource resource; |
66 | |
67 | private final String encoding; |
68 | |
69 | private Collection comments = DEFAULT_COMMENTS; |
70 | |
71 | // Encapsulates the state of the reader. |
72 | private State state = null; |
73 | |
74 | private RecordSeparatorPolicy recordSeparatorPolicy = new DefaultRecordSeparatorPolicy(); |
75 | |
76 | public ResourceLineReader(Resource resource) throws IOException { |
77 | this(resource, DEFAULT_ENCODING); |
78 | } |
79 | |
80 | public ResourceLineReader(Resource resource, String encoding) { |
81 | Assert.notNull(resource, "'resource' cannot be null."); |
82 | Assert.notNull(encoding, "'encoding' cannot be null."); |
83 | this.resource = resource; |
84 | this.encoding = encoding; |
85 | } |
86 | |
87 | /** |
88 | * Setter for the {@link RecordSeparatorPolicy}. Default value is a {@link DefaultRecordSeparatorPolicy}. Ideally |
89 | * should not be changed once a reader is in use, but it would not be fatal if it was. |
90 | * |
91 | * @param recordSeparatorPolicy the new {@link RecordSeparatorPolicy} |
92 | */ |
93 | public void setRecordSeparatorPolicy(RecordSeparatorPolicy recordSeparatorPolicy) { |
94 | /* |
95 | * The rest of the code accesses the policy in synchronized blocks, copying the reference before using it. So in |
96 | * principle it can be changed in flight - the results might not be what the user expected! |
97 | */ |
98 | this.recordSeparatorPolicy = recordSeparatorPolicy; |
99 | } |
100 | |
101 | /** |
102 | * Setter for comment prefixes. Can be used to ignore header lines as well by using e.g. the first couple of column |
103 | * names as a prefix. |
104 | * |
105 | * @param comments an array of comment line prefixes. |
106 | */ |
107 | public void setComments(String[] comments) { |
108 | this.comments = new HashSet(Arrays.asList(comments)); |
109 | } |
110 | |
111 | /** |
112 | * Read the next line from the input resource, ignoring comments, and according to the {@link RecordSeparatorPolicy}. |
113 | * |
114 | * @return a String. |
115 | * |
116 | * @see org.springframework.batch.item.ItemReader#read() |
117 | */ |
118 | public synchronized Object read() { |
119 | // Make a copy of the recordSeparatorPolicy reference, in case it is |
120 | // changed during a read operation (unlikely, but you never know)... |
121 | RecordSeparatorPolicy recordSeparatorPolicy = this.recordSeparatorPolicy; |
122 | String line = readLine(); |
123 | String record = line; |
124 | if (line != null) { |
125 | while (line != null && !recordSeparatorPolicy.isEndOfRecord(record)) { |
126 | record = recordSeparatorPolicy.preProcess(record) + (line = readLine()); |
127 | } |
128 | } |
129 | return recordSeparatorPolicy.postProcess(record); |
130 | } |
131 | |
132 | /** |
133 | * @return the next non-comment line |
134 | */ |
135 | private String readLine() { |
136 | return getState().readLine(); |
137 | } |
138 | |
139 | /** |
140 | * @return |
141 | */ |
142 | private State getState() { |
143 | if (state == null) { |
144 | open(); |
145 | } |
146 | return state; |
147 | } |
148 | |
149 | /** |
150 | * Creates internal state object. |
151 | */ |
152 | public synchronized void open() { |
153 | state = new State(); |
154 | state.open(); |
155 | } |
156 | |
157 | /** |
158 | * Close the reader associated with this input source. |
159 | * |
160 | * @see org.springframework.batch.item.ItemStreamSupport#close(org.springframework.batch.item.ExecutionContext) |
161 | */ |
162 | public synchronized void close(ExecutionContext executionContext) { |
163 | if (state == null) { |
164 | return; |
165 | } |
166 | try { |
167 | state.close(); |
168 | } finally { |
169 | state = null; |
170 | } |
171 | } |
172 | |
173 | /** |
174 | * Getter for current line count (not the current number of lines returned). |
175 | * |
176 | * @return the current line count. |
177 | */ |
178 | public int getPosition() { |
179 | return getState().getCurrentLineCount(); |
180 | } |
181 | |
182 | /** |
183 | * Mark the state for return later with reset. Uses the read-ahead limit from an underlying {@link BufferedReader}, |
184 | * which means that there is a limit to how much data can be recovered if the mark needs to be reset.<br/> |
185 | * |
186 | * Mark is supported as long as this {@link ItemStream} is used in a single-threaded environment. The state backing |
187 | * the mark is a single counter, keeping track of the current position, so multiple threads cannot be accommodated. |
188 | * |
189 | * @see #reset() |
190 | * |
191 | * @throws MarkFailedException if the mark could not be set. |
192 | */ |
193 | public synchronized void mark() throws MarkFailedException { |
194 | getState().mark(); |
195 | } |
196 | |
197 | /** |
198 | * Reset the reader to the last mark. |
199 | * |
200 | * @see #mark() |
201 | * |
202 | * @throws ResetFailedException if the reset is unsuccessful, e.g. if the read-ahead limit was breached. |
203 | */ |
204 | public synchronized void reset() throws ResetFailedException { |
205 | getState().reset(); |
206 | } |
207 | |
208 | private boolean isComment(String line) { |
209 | for (Iterator iter = comments.iterator(); iter.hasNext();) { |
210 | String prefix = (String) iter.next(); |
211 | if (line.startsWith(prefix)) { |
212 | return true; |
213 | } |
214 | } |
215 | return false; |
216 | } |
217 | |
218 | private class State { |
219 | private BufferedReader reader; |
220 | |
221 | private int currentLineCount = 0; |
222 | |
223 | private int markedLineCount = -1; |
224 | |
225 | public String readLine() { |
226 | String line = null; |
227 | |
228 | try { |
229 | line = this.reader.readLine(); |
230 | if (line == null) { |
231 | return null; |
232 | } |
233 | currentLineCount++; |
234 | while (isComment(line)) { |
235 | line = reader.readLine(); |
236 | if (line == null) { |
237 | return null; |
238 | } |
239 | currentLineCount++; |
240 | } |
241 | } catch (IOException e) { |
242 | throw new UnexpectedInputException("Unable to read from resource '" + resource + "' at line " |
243 | + currentLineCount, e); |
244 | } |
245 | return line; |
246 | } |
247 | |
248 | /** |
249 | * |
250 | */ |
251 | public void open() { |
252 | try { |
253 | reader = new BufferedReader(new InputStreamReader(resource.getInputStream(), encoding)); |
254 | mark(); |
255 | } catch (IOException e) { |
256 | throw new ItemStreamException("Could not open resource", e); |
257 | } |
258 | } |
259 | |
260 | /** |
261 | * Close the reader and reset the counters. |
262 | */ |
263 | public void close() { |
264 | |
265 | if (reader == null) { |
266 | return; |
267 | } |
268 | try { |
269 | reader.close(); |
270 | } catch (IOException e) { |
271 | throw new ItemStreamException("Could not close reader", e); |
272 | } finally { |
273 | currentLineCount = 0; |
274 | markedLineCount = -1; |
275 | } |
276 | |
277 | } |
278 | |
279 | /** |
280 | * @return the current line count |
281 | */ |
282 | public int getCurrentLineCount() { |
283 | return currentLineCount; |
284 | } |
285 | |
286 | /** |
287 | * Mark the underlying reader and set the line counters. |
288 | */ |
289 | public void mark() throws MarkFailedException { |
290 | try { |
291 | reader.mark(READ_AHEAD_LIMIT); |
292 | markedLineCount = currentLineCount; |
293 | } catch (IOException e) { |
294 | throw new MarkFailedException("Could not mark reader", e); |
295 | } |
296 | } |
297 | |
298 | /** |
299 | * Reset the reader and line counters to the last marked position if possible. |
300 | */ |
301 | public void reset() throws ResetFailedException { |
302 | |
303 | if (markedLineCount < 0) { |
304 | return; |
305 | } |
306 | try { |
307 | this.reader.reset(); |
308 | currentLineCount = markedLineCount; |
309 | } catch (IOException e) { |
310 | throw new ResetFailedException("Could not reset reader", e); |
311 | } |
312 | |
313 | } |
314 | |
315 | } |
316 | |
317 | } |