1 | /* |
2 | * Copyright 2006-2007 the original author or authors. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | package org.springframework.batch.item.file.separator; |
18 | |
19 | import java.io.BufferedReader; |
20 | import java.io.IOException; |
21 | import java.io.InputStreamReader; |
22 | import java.util.Arrays; |
23 | import java.util.Collection; |
24 | import java.util.Collections; |
25 | import java.util.HashSet; |
26 | import java.util.Iterator; |
27 | |
28 | import org.springframework.batch.item.ItemReader; |
29 | import org.springframework.batch.item.ItemStream; |
30 | import org.springframework.batch.item.ItemStreamException; |
31 | import org.springframework.batch.item.MarkFailedException; |
32 | import org.springframework.batch.item.ResetFailedException; |
33 | import org.springframework.batch.item.UnexpectedInputException; |
34 | import org.springframework.core.io.Resource; |
35 | import org.springframework.util.Assert; |
36 | |
37 | /** |
38 | * An input source that reads lines one by one from a resource. <br/> |
39 | * |
40 | * A line can consist of multiple lines in the input resource, according to the {@link RecordSeparatorPolicy} in force. |
41 | * By default a line is either terminated by a newline (as per {@link BufferedReader#readLine()}), or can be continued |
42 | * onto the next line if a field surrounded by quotes (\") contains a newline.<br/> |
43 | * |
44 | * Comment lines can be indicated using a line prefix (or collection of prefixes) and they will be ignored. The default |
45 | * is "#", so lines starting with a pound sign will be ignored.<br/> |
46 | * |
47 | * All the public methods that interact with the underlying resource (open, close, read etc.) are synchronized on this.<br/> |
48 | * |
49 | * Package private because this is not intended to be a public API - used internally by the flat file input sources. |
50 | * That makes abuses of the fact that it is stateful easier to control.<br/> |
51 | * |
52 | * @author Dave Syer |
53 | * @author Rob Harrop |
54 | */ |
55 | public class ResourceLineReader implements LineReader, ItemReader { |
56 | |
57 | private static final Collection DEFAULT_COMMENTS = Collections.singleton("#"); |
58 | |
59 | private static final String DEFAULT_ENCODING = "ISO-8859-1"; |
60 | |
61 | private static final int READ_AHEAD_LIMIT = 100000; |
62 | |
63 | private final Resource resource; |
64 | |
65 | private final String encoding; |
66 | |
67 | private Collection comments = DEFAULT_COMMENTS; |
68 | |
69 | // Encapsulates the state of the reader. |
70 | private State state = null; |
71 | |
72 | private RecordSeparatorPolicy recordSeparatorPolicy = new DefaultRecordSeparatorPolicy(); |
73 | |
74 | public ResourceLineReader(Resource resource) throws IOException { |
75 | this(resource, DEFAULT_ENCODING); |
76 | } |
77 | |
78 | public ResourceLineReader(Resource resource, String encoding) { |
79 | Assert.notNull(resource, "'resource' cannot be null."); |
80 | Assert.notNull(encoding, "'encoding' cannot be null."); |
81 | this.resource = resource; |
82 | this.encoding = encoding; |
83 | } |
84 | |
85 | /** |
86 | * Setter for the {@link RecordSeparatorPolicy}. Default value is a {@link DefaultRecordSeparatorPolicy}. Ideally |
87 | * should not be changed once a reader is in use, but it would not be fatal if it was. |
88 | * |
89 | * @param recordSeparatorPolicy the new {@link RecordSeparatorPolicy} |
90 | */ |
91 | public void setRecordSeparatorPolicy(RecordSeparatorPolicy recordSeparatorPolicy) { |
92 | /* |
93 | * The rest of the code accesses the policy in synchronized blocks, copying the reference before using it. So in |
94 | * principle it can be changed in flight - the results might not be what the user expected! |
95 | */ |
96 | this.recordSeparatorPolicy = recordSeparatorPolicy; |
97 | } |
98 | |
99 | /** |
100 | * Setter for comment prefixes. Can be used to ignore header lines as well by using e.g. the first couple of column |
101 | * names as a prefix. |
102 | * |
103 | * @param comments an array of comment line prefixes. |
104 | */ |
105 | public void setComments(String[] comments) { |
106 | this.comments = new HashSet(Arrays.asList(comments)); |
107 | } |
108 | |
109 | /** |
110 | * Read the next line from the input resource, ignoring comments, and according to the {@link RecordSeparatorPolicy}. |
111 | * |
112 | * @return a String. |
113 | * |
114 | * @see org.springframework.batch.item.ItemReader#read() |
115 | */ |
116 | public synchronized Object read() { |
117 | // Make a copy of the recordSeparatorPolicy reference, in case it is |
118 | // changed during a read operation (unlikely, but you never know)... |
119 | RecordSeparatorPolicy recordSeparatorPolicy = this.recordSeparatorPolicy; |
120 | String line = readLine(); |
121 | String record = line; |
122 | if (line != null) { |
123 | while (line != null && !recordSeparatorPolicy.isEndOfRecord(record)) { |
124 | record = recordSeparatorPolicy.preProcess(record) + (line = readLine()); |
125 | } |
126 | } |
127 | return recordSeparatorPolicy.postProcess(record); |
128 | } |
129 | |
130 | /** |
131 | * @return the next non-comment line |
132 | */ |
133 | private String readLine() { |
134 | return getState().readLine(); |
135 | } |
136 | |
137 | /** |
138 | * @return |
139 | */ |
140 | private State getState() { |
141 | if (state == null) { |
142 | open(); |
143 | } |
144 | return state; |
145 | } |
146 | |
147 | /** |
148 | * Creates internal state object. |
149 | */ |
150 | public synchronized void open() { |
151 | state = new State(); |
152 | state.open(); |
153 | } |
154 | |
155 | /** |
156 | * Close the reader associated with this input source. |
157 | */ |
158 | public synchronized void close() { |
159 | if (state == null) { |
160 | return; |
161 | } |
162 | try { |
163 | state.close(); |
164 | } finally { |
165 | state = null; |
166 | } |
167 | } |
168 | |
169 | /** |
170 | * Getter for current line count (not the current number of lines returned). |
171 | * |
172 | * @return the current line count. |
173 | */ |
174 | public int getPosition() { |
175 | return getState().getCurrentLineCount(); |
176 | } |
177 | |
178 | /** |
179 | * Mark the state for return later with reset. Uses the read-ahead limit from an underlying {@link BufferedReader}, |
180 | * which means that there is a limit to how much data can be recovered if the mark needs to be reset.<br/> |
181 | * |
182 | * Mark is supported as long as this {@link ItemStream} is used in a single-threaded environment. The state backing |
183 | * the mark is a single counter, keeping track of the current position, so multiple threads cannot be accommodated. |
184 | * |
185 | * @see #reset() |
186 | * |
187 | * @throws MarkFailedException if the mark could not be set. |
188 | */ |
189 | public synchronized void mark() throws MarkFailedException { |
190 | getState().mark(); |
191 | } |
192 | |
193 | /** |
194 | * Reset the reader to the last mark. |
195 | * |
196 | * @see #mark() |
197 | * |
198 | * @throws ResetFailedException if the reset is unsuccessful, e.g. if the read-ahead limit was breached. |
199 | */ |
200 | public synchronized void reset() throws ResetFailedException { |
201 | getState().reset(); |
202 | } |
203 | |
204 | private boolean isComment(String line) { |
205 | for (Iterator iter = comments.iterator(); iter.hasNext();) { |
206 | String prefix = (String) iter.next(); |
207 | if (line.startsWith(prefix)) { |
208 | return true; |
209 | } |
210 | } |
211 | return false; |
212 | } |
213 | |
214 | private class State { |
215 | private BufferedReader reader; |
216 | |
217 | private int currentLineCount = 0; |
218 | |
219 | private int markedLineCount = -1; |
220 | |
221 | public String readLine() { |
222 | String line = null; |
223 | |
224 | try { |
225 | line = this.reader.readLine(); |
226 | if (line == null) { |
227 | return null; |
228 | } |
229 | currentLineCount++; |
230 | while (isComment(line)) { |
231 | line = reader.readLine(); |
232 | if (line == null) { |
233 | return null; |
234 | } |
235 | currentLineCount++; |
236 | } |
237 | } catch (IOException e) { |
238 | throw new UnexpectedInputException("Unable to read from resource '" + resource + "' at line " |
239 | + currentLineCount, e); |
240 | } |
241 | return line; |
242 | } |
243 | |
244 | /** |
245 | * |
246 | */ |
247 | public void open() { |
248 | try { |
249 | reader = new BufferedReader(new InputStreamReader(resource.getInputStream(), encoding)); |
250 | mark(); |
251 | } catch (IOException e) { |
252 | throw new ItemStreamException("Could not open resource", e); |
253 | } |
254 | } |
255 | |
256 | /** |
257 | * Close the reader and reset the counters. |
258 | */ |
259 | public void close() { |
260 | |
261 | if (reader == null) { |
262 | return; |
263 | } |
264 | try { |
265 | reader.close(); |
266 | } catch (IOException e) { |
267 | throw new ItemStreamException("Could not close reader", e); |
268 | } finally { |
269 | currentLineCount = 0; |
270 | markedLineCount = -1; |
271 | } |
272 | |
273 | } |
274 | |
275 | /** |
276 | * @return the current line count |
277 | */ |
278 | public int getCurrentLineCount() { |
279 | return currentLineCount; |
280 | } |
281 | |
282 | /** |
283 | * Mark the underlying reader and set the line counters. |
284 | */ |
285 | public void mark() throws MarkFailedException { |
286 | try { |
287 | reader.mark(READ_AHEAD_LIMIT); |
288 | markedLineCount = currentLineCount; |
289 | } catch (IOException e) { |
290 | throw new MarkFailedException("Could not mark reader", e); |
291 | } |
292 | } |
293 | |
294 | /** |
295 | * Reset the reader and line counters to the last marked position if possible. |
296 | */ |
297 | public void reset() throws ResetFailedException { |
298 | |
299 | if (markedLineCount < 0) { |
300 | return; |
301 | } |
302 | try { |
303 | this.reader.reset(); |
304 | currentLineCount = markedLineCount; |
305 | } catch (IOException e) { |
306 | throw new ResetFailedException("Could not reset reader", e); |
307 | } |
308 | |
309 | } |
310 | |
311 | } |
312 | |
313 | } |