1 | /* |
2 | * Copyright 2006-2007 the original author or authors. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | package org.springframework.batch.item.file; |
18 | |
19 | import org.apache.commons.logging.Log; |
20 | import org.apache.commons.logging.LogFactory; |
21 | import org.springframework.batch.item.ItemReader; |
22 | import org.springframework.batch.item.ItemReaderException; |
23 | import org.springframework.batch.item.ItemStreamException; |
24 | import org.springframework.batch.item.ReaderNotOpenException; |
25 | import org.springframework.batch.item.file.mapping.FieldSet; |
26 | import org.springframework.batch.item.file.mapping.FieldSetMapper; |
27 | import org.springframework.batch.item.file.separator.LineReader; |
28 | import org.springframework.batch.item.file.separator.RecordSeparatorPolicy; |
29 | import org.springframework.batch.item.file.separator.ResourceLineReader; |
30 | import org.springframework.batch.item.file.transform.AbstractLineTokenizer; |
31 | import org.springframework.batch.item.file.transform.DelimitedLineTokenizer; |
32 | import org.springframework.batch.item.file.transform.LineTokenizer; |
33 | import org.springframework.batch.item.support.AbstractBufferedItemReaderItemStream; |
34 | import org.springframework.beans.factory.InitializingBean; |
35 | import org.springframework.core.io.Resource; |
36 | import org.springframework.util.Assert; |
37 | import org.springframework.util.ClassUtils; |
38 | |
39 | /** |
40 | * This class represents a {@link ItemReader}, that reads lines from text file, |
41 | * tokenizes them to structured tuples ({@link FieldSet}s) instances and maps |
42 | * the {@link FieldSet}s to domain objects. The location of the file is defined |
43 | * by the resource property. To separate the structure of the file, |
44 | * {@link LineTokenizer} is used to parse data obtained from the file. <br/> |
45 | * |
46 | * A {@link FlatFileItemReader} is not thread safe because it maintains state in |
47 | * the form of a {@link ResourceLineReader}. Be careful to configure a |
48 | * {@link FlatFileItemReader} using an appropriate factory or scope so that it |
49 | * is not shared between threads.<br/> |
50 | * |
51 | * <p> |
52 | * This class supports restart, skipping invalid lines and storing statistics. |
53 | * It can be configured to setup {@link FieldSet} column names from the file |
54 | * header, skip given number of lines at the beginning of the file. |
55 | * </p> |
56 | * |
57 | * The implementation is *not* thread-safe. |
58 | * |
59 | * @author Waseem Malik |
60 | * @author Tomas Slanina |
61 | * @author Robert Kasanicky |
62 | * @author Dave Syer |
63 | */ |
64 | public class FlatFileItemReader extends AbstractBufferedItemReaderItemStream implements |
65 | ResourceAwareItemReaderItemStream, InitializingBean { |
66 | |
67 | private static Log log = LogFactory.getLog(FlatFileItemReader.class); |
68 | |
69 | // default encoding for input files |
70 | public static final String DEFAULT_CHARSET = "ISO-8859-1"; |
71 | |
72 | private String encoding = DEFAULT_CHARSET; |
73 | |
74 | private Resource resource; |
75 | |
76 | private RecordSeparatorPolicy recordSeparatorPolicy; |
77 | |
78 | private String[] comments; |
79 | |
80 | private int linesToSkip = 0; |
81 | |
82 | private boolean firstLineIsHeader = false; |
83 | |
84 | private LineTokenizer tokenizer = new DelimitedLineTokenizer(); |
85 | |
86 | private FieldSetMapper fieldSetMapper; |
87 | |
88 | /** |
89 | * Encapsulates the state of the input source. If it is null then we are |
90 | * uninitialized. |
91 | */ |
92 | private LineReader reader; |
93 | |
94 | private boolean noInput = false; |
95 | |
96 | public FlatFileItemReader() { |
97 | setName(ClassUtils.getShortName(FlatFileItemReader.class)); |
98 | } |
99 | |
100 | /** |
101 | * @return next line to be tokenized and mapped. |
102 | */ |
103 | private String readLine() { |
104 | try { |
105 | return (String) getReader().read(); |
106 | } |
107 | catch (ItemStreamException e) { |
108 | throw e; |
109 | } |
110 | catch (ItemReaderException e) { |
111 | throw e; |
112 | } |
113 | catch (Exception e) { |
114 | throw new IllegalStateException(); |
115 | } |
116 | } |
117 | |
118 | /** |
119 | * @return line reader used to read input file |
120 | */ |
121 | protected LineReader getReader() { |
122 | if (reader == null) { |
123 | throw new ReaderNotOpenException("Reader must be open before it can be read."); |
124 | // reader is now not null, or else an exception is thrown |
125 | } |
126 | return reader; |
127 | } |
128 | |
129 | /** |
130 | * Setter for resource property. The location of an input stream that can be |
131 | * read. |
132 | * |
133 | * @param resource |
134 | */ |
135 | public void setResource(Resource resource) { |
136 | this.resource = resource; |
137 | } |
138 | |
139 | /** |
140 | * Public setter for the recordSeparatorPolicy. Used to determine where the |
141 | * line endings are and do things like continue over a line ending if inside |
142 | * a quoted string. |
143 | * |
144 | * @param recordSeparatorPolicy the recordSeparatorPolicy to set |
145 | */ |
146 | public void setRecordSeparatorPolicy(RecordSeparatorPolicy recordSeparatorPolicy) { |
147 | this.recordSeparatorPolicy = recordSeparatorPolicy; |
148 | } |
149 | |
150 | /** |
151 | * Setter for comment prefixes. Can be used to ignore header lines as well |
152 | * by using e.g. the first couple of column names as a prefix. |
153 | * |
154 | * @param comments an array of comment line prefixes. |
155 | */ |
156 | public void setComments(String[] comments) { |
157 | this.comments = new String[comments.length]; |
158 | System.arraycopy(comments, 0, this.comments, 0, comments.length); |
159 | } |
160 | |
161 | /** |
162 | * Indicates whether first line is a header. If the tokenizer is an |
163 | * {@link AbstractLineTokenizer} and the column names haven't been set |
164 | * already then the header will be used to setup column names. Default is |
165 | * <code>false</code>. |
166 | */ |
167 | public void setFirstLineIsHeader(boolean firstLineIsHeader) { |
168 | this.firstLineIsHeader = firstLineIsHeader; |
169 | } |
170 | |
171 | /** |
172 | * @param lineTokenizer tokenizes each line from file into {@link FieldSet}. |
173 | */ |
174 | public void setLineTokenizer(LineTokenizer lineTokenizer) { |
175 | this.tokenizer = lineTokenizer; |
176 | } |
177 | |
178 | /** |
179 | * Set the FieldSetMapper to be used for each line. |
180 | * |
181 | * @param fieldSetMapper |
182 | */ |
183 | public void setFieldSetMapper(FieldSetMapper fieldSetMapper) { |
184 | this.fieldSetMapper = fieldSetMapper; |
185 | } |
186 | |
187 | /** |
188 | * Public setter for the number of lines to skip at the start of a file. Can |
189 | * be used if the file contains a header without useful (column name) |
190 | * information, and without a comment delimiter at the beginning of the |
191 | * lines. |
192 | * |
193 | * @param linesToSkip the number of lines to skip |
194 | */ |
195 | public void setLinesToSkip(int linesToSkip) { |
196 | this.linesToSkip = linesToSkip; |
197 | } |
198 | |
199 | /** |
200 | * Setter for the encoding for this input source. Default value is |
201 | * {@link #DEFAULT_CHARSET}. |
202 | * |
203 | * @param encoding a properties object which possibly contains the encoding |
204 | * for this input file; |
205 | */ |
206 | public void setEncoding(String encoding) { |
207 | this.encoding = encoding; |
208 | } |
209 | |
210 | public void afterPropertiesSet() throws Exception { |
211 | Assert.notNull(fieldSetMapper, "FieldSetMapper must not be null."); |
212 | } |
213 | |
214 | protected void doClose() throws Exception { |
215 | try { |
216 | if (reader != null) { |
217 | log.debug("Closing flat file for reading: " + resource); |
218 | reader.close(); |
219 | } |
220 | } |
221 | finally { |
222 | reader = null; |
223 | } |
224 | } |
225 | |
226 | protected void doOpen() throws Exception { |
227 | Assert.notNull(resource, "Input Resource must not be null"); |
228 | |
229 | noInput = false; |
230 | if (!resource.exists()) { |
231 | noInput = true; |
232 | log.warn("Input resource does not exist"); |
233 | return; |
234 | } |
235 | |
236 | log.debug("Opening flat file for reading: " + resource); |
237 | |
238 | if (this.reader == null) { |
239 | ResourceLineReader reader = new ResourceLineReader(resource, encoding); |
240 | if (recordSeparatorPolicy != null) { |
241 | reader.setRecordSeparatorPolicy(recordSeparatorPolicy); |
242 | } |
243 | if (comments != null) { |
244 | reader.setComments(comments); |
245 | } |
246 | reader.open(); |
247 | this.reader = reader; |
248 | } |
249 | |
250 | for (int i = 0; i < linesToSkip; i++) { |
251 | readLine(); |
252 | } |
253 | |
254 | if (firstLineIsHeader) { |
255 | // skip the header |
256 | String firstLine = readLine(); |
257 | // set names in tokenizer if they haven't been set already |
258 | if (tokenizer instanceof AbstractLineTokenizer && !((AbstractLineTokenizer) tokenizer).hasNames()) { |
259 | String[] names = tokenizer.tokenize(firstLine).getValues(); |
260 | ((AbstractLineTokenizer) tokenizer).setNames(names); |
261 | } |
262 | } |
263 | |
264 | } |
265 | |
266 | /** |
267 | * Reads a line from input, tokenizes is it using the |
268 | * {@link #setLineTokenizer(LineTokenizer)} and maps to domain object using |
269 | * {@link #setFieldSetMapper(FieldSetMapper)}. |
270 | * |
271 | * @see org.springframework.batch.item.ItemReader#read() |
272 | */ |
273 | protected Object doRead() throws Exception { |
274 | if (noInput) { |
275 | return null; |
276 | } |
277 | String line = readLine(); |
278 | |
279 | if (line != null) { |
280 | int lineCount = getReader().getPosition(); |
281 | try { |
282 | FieldSet tokenizedLine = tokenizer.tokenize(line); |
283 | return fieldSetMapper.mapLine(tokenizedLine); |
284 | } |
285 | catch (RuntimeException ex) { |
286 | // add current line count to message and re-throw |
287 | throw new FlatFileParseException("Parsing error at line: " + lineCount + " in resource=" |
288 | + resource.getDescription() + ", input=[" + line + "]", ex, line, lineCount); |
289 | } |
290 | } |
291 | return null; |
292 | } |
293 | |
294 | } |