| 1 | /* |
| 2 | * Copyright 2006-2007 the original author or authors. |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package org.springframework.batch.item.file; |
| 18 | |
| 19 | import org.apache.commons.logging.Log; |
| 20 | import org.apache.commons.logging.LogFactory; |
| 21 | import org.springframework.batch.item.ItemReader; |
| 22 | import org.springframework.batch.item.ItemReaderException; |
| 23 | import org.springframework.batch.item.ItemStreamException; |
| 24 | import org.springframework.batch.item.ReaderNotOpenException; |
| 25 | import org.springframework.batch.item.file.mapping.FieldSet; |
| 26 | import org.springframework.batch.item.file.mapping.FieldSetMapper; |
| 27 | import org.springframework.batch.item.file.separator.LineReader; |
| 28 | import org.springframework.batch.item.file.separator.RecordSeparatorPolicy; |
| 29 | import org.springframework.batch.item.file.separator.ResourceLineReader; |
| 30 | import org.springframework.batch.item.file.transform.AbstractLineTokenizer; |
| 31 | import org.springframework.batch.item.file.transform.DelimitedLineTokenizer; |
| 32 | import org.springframework.batch.item.file.transform.LineTokenizer; |
| 33 | import org.springframework.batch.item.support.AbstractBufferedItemReaderItemStream; |
| 34 | import org.springframework.beans.factory.InitializingBean; |
| 35 | import org.springframework.core.io.Resource; |
| 36 | import org.springframework.util.Assert; |
| 37 | import org.springframework.util.ClassUtils; |
| 38 | |
| 39 | /** |
| 40 | * This class represents a {@link ItemReader}, that reads lines from text file, |
| 41 | * tokenizes them to structured tuples ({@link FieldSet}s) instances and maps |
| 42 | * the {@link FieldSet}s to domain objects. The location of the file is defined |
| 43 | * by the resource property. To separate the structure of the file, |
| 44 | * {@link LineTokenizer} is used to parse data obtained from the file. <br/> |
| 45 | * |
| 46 | * A {@link FlatFileItemReader} is not thread safe because it maintains state in |
| 47 | * the form of a {@link ResourceLineReader}. Be careful to configure a |
| 48 | * {@link FlatFileItemReader} using an appropriate factory or scope so that it |
| 49 | * is not shared between threads.<br/> |
| 50 | * |
| 51 | * <p> |
| 52 | * This class supports restart, skipping invalid lines and storing statistics. |
| 53 | * It can be configured to setup {@link FieldSet} column names from the file |
| 54 | * header, skip given number of lines at the beginning of the file. |
| 55 | * </p> |
| 56 | * |
| 57 | * The implementation is *not* thread-safe. |
| 58 | * |
| 59 | * @author Waseem Malik |
| 60 | * @author Tomas Slanina |
| 61 | * @author Robert Kasanicky |
| 62 | * @author Dave Syer |
| 63 | */ |
| 64 | public class FlatFileItemReader extends AbstractBufferedItemReaderItemStream implements |
| 65 | ResourceAwareItemReaderItemStream, InitializingBean { |
| 66 | |
| 67 | private static Log log = LogFactory.getLog(FlatFileItemReader.class); |
| 68 | |
| 69 | // default encoding for input files |
| 70 | public static final String DEFAULT_CHARSET = "ISO-8859-1"; |
| 71 | |
| 72 | private String encoding = DEFAULT_CHARSET; |
| 73 | |
| 74 | private Resource resource; |
| 75 | |
| 76 | private RecordSeparatorPolicy recordSeparatorPolicy; |
| 77 | |
| 78 | private String[] comments; |
| 79 | |
| 80 | private int linesToSkip = 0; |
| 81 | |
| 82 | private boolean firstLineIsHeader = false; |
| 83 | |
| 84 | private LineTokenizer tokenizer = new DelimitedLineTokenizer(); |
| 85 | |
| 86 | private FieldSetMapper fieldSetMapper; |
| 87 | |
| 88 | /** |
| 89 | * Encapsulates the state of the input source. If it is null then we are |
| 90 | * uninitialized. |
| 91 | */ |
| 92 | private LineReader reader; |
| 93 | |
| 94 | private boolean noInput = false; |
| 95 | |
| 96 | public FlatFileItemReader() { |
| 97 | setName(ClassUtils.getShortName(FlatFileItemReader.class)); |
| 98 | } |
| 99 | |
| 100 | /** |
| 101 | * @return next line to be tokenized and mapped. |
| 102 | */ |
| 103 | private String readLine() { |
| 104 | try { |
| 105 | return (String) getReader().read(); |
| 106 | } |
| 107 | catch (ItemStreamException e) { |
| 108 | throw e; |
| 109 | } |
| 110 | catch (ItemReaderException e) { |
| 111 | throw e; |
| 112 | } |
| 113 | catch (Exception e) { |
| 114 | throw new IllegalStateException(); |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | /** |
| 119 | * @return line reader used to read input file |
| 120 | */ |
| 121 | protected LineReader getReader() { |
| 122 | if (reader == null) { |
| 123 | throw new ReaderNotOpenException("Reader must be open before it can be read."); |
| 124 | // reader is now not null, or else an exception is thrown |
| 125 | } |
| 126 | return reader; |
| 127 | } |
| 128 | |
| 129 | /** |
| 130 | * Setter for resource property. The location of an input stream that can be |
| 131 | * read. |
| 132 | * |
| 133 | * @param resource |
| 134 | */ |
| 135 | public void setResource(Resource resource) { |
| 136 | this.resource = resource; |
| 137 | } |
| 138 | |
| 139 | /** |
| 140 | * Public setter for the recordSeparatorPolicy. Used to determine where the |
| 141 | * line endings are and do things like continue over a line ending if inside |
| 142 | * a quoted string. |
| 143 | * |
| 144 | * @param recordSeparatorPolicy the recordSeparatorPolicy to set |
| 145 | */ |
| 146 | public void setRecordSeparatorPolicy(RecordSeparatorPolicy recordSeparatorPolicy) { |
| 147 | this.recordSeparatorPolicy = recordSeparatorPolicy; |
| 148 | } |
| 149 | |
| 150 | /** |
| 151 | * Setter for comment prefixes. Can be used to ignore header lines as well |
| 152 | * by using e.g. the first couple of column names as a prefix. |
| 153 | * |
| 154 | * @param comments an array of comment line prefixes. |
| 155 | */ |
| 156 | public void setComments(String[] comments) { |
| 157 | this.comments = new String[comments.length]; |
| 158 | System.arraycopy(comments, 0, this.comments, 0, comments.length); |
| 159 | } |
| 160 | |
| 161 | /** |
| 162 | * Indicates whether first line is a header. If the tokenizer is an |
| 163 | * {@link AbstractLineTokenizer} and the column names haven't been set |
| 164 | * already then the header will be used to setup column names. Default is |
| 165 | * <code>false</code>. |
| 166 | */ |
| 167 | public void setFirstLineIsHeader(boolean firstLineIsHeader) { |
| 168 | this.firstLineIsHeader = firstLineIsHeader; |
| 169 | } |
| 170 | |
| 171 | /** |
| 172 | * @param lineTokenizer tokenizes each line from file into {@link FieldSet}. |
| 173 | */ |
| 174 | public void setLineTokenizer(LineTokenizer lineTokenizer) { |
| 175 | this.tokenizer = lineTokenizer; |
| 176 | } |
| 177 | |
| 178 | /** |
| 179 | * Set the FieldSetMapper to be used for each line. |
| 180 | * |
| 181 | * @param fieldSetMapper |
| 182 | */ |
| 183 | public void setFieldSetMapper(FieldSetMapper fieldSetMapper) { |
| 184 | this.fieldSetMapper = fieldSetMapper; |
| 185 | } |
| 186 | |
| 187 | /** |
| 188 | * Public setter for the number of lines to skip at the start of a file. Can |
| 189 | * be used if the file contains a header without useful (column name) |
| 190 | * information, and without a comment delimiter at the beginning of the |
| 191 | * lines. |
| 192 | * |
| 193 | * @param linesToSkip the number of lines to skip |
| 194 | */ |
| 195 | public void setLinesToSkip(int linesToSkip) { |
| 196 | this.linesToSkip = linesToSkip; |
| 197 | } |
| 198 | |
| 199 | /** |
| 200 | * Setter for the encoding for this input source. Default value is |
| 201 | * {@link #DEFAULT_CHARSET}. |
| 202 | * |
| 203 | * @param encoding a properties object which possibly contains the encoding |
| 204 | * for this input file; |
| 205 | */ |
| 206 | public void setEncoding(String encoding) { |
| 207 | this.encoding = encoding; |
| 208 | } |
| 209 | |
| 210 | public void afterPropertiesSet() throws Exception { |
| 211 | Assert.notNull(fieldSetMapper, "FieldSetMapper must not be null."); |
| 212 | } |
| 213 | |
| 214 | protected void doClose() throws Exception { |
| 215 | try { |
| 216 | if (reader != null) { |
| 217 | log.debug("Closing flat file for reading: " + resource); |
| 218 | reader.close(); |
| 219 | } |
| 220 | } |
| 221 | finally { |
| 222 | reader = null; |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | protected void doOpen() throws Exception { |
| 227 | Assert.notNull(resource, "Input Resource must not be null"); |
| 228 | |
| 229 | noInput = false; |
| 230 | if (!resource.exists()) { |
| 231 | noInput = true; |
| 232 | log.warn("Input resource does not exist"); |
| 233 | return; |
| 234 | } |
| 235 | |
| 236 | log.debug("Opening flat file for reading: " + resource); |
| 237 | |
| 238 | if (this.reader == null) { |
| 239 | ResourceLineReader reader = new ResourceLineReader(resource, encoding); |
| 240 | if (recordSeparatorPolicy != null) { |
| 241 | reader.setRecordSeparatorPolicy(recordSeparatorPolicy); |
| 242 | } |
| 243 | if (comments != null) { |
| 244 | reader.setComments(comments); |
| 245 | } |
| 246 | reader.open(); |
| 247 | this.reader = reader; |
| 248 | } |
| 249 | |
| 250 | for (int i = 0; i < linesToSkip; i++) { |
| 251 | readLine(); |
| 252 | } |
| 253 | |
| 254 | if (firstLineIsHeader) { |
| 255 | // skip the header |
| 256 | String firstLine = readLine(); |
| 257 | // set names in tokenizer if they haven't been set already |
| 258 | if (tokenizer instanceof AbstractLineTokenizer && !((AbstractLineTokenizer) tokenizer).hasNames()) { |
| 259 | String[] names = tokenizer.tokenize(firstLine).getValues(); |
| 260 | ((AbstractLineTokenizer) tokenizer).setNames(names); |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Reads a line from input, tokenizes is it using the |
| 268 | * {@link #setLineTokenizer(LineTokenizer)} and maps to domain object using |
| 269 | * {@link #setFieldSetMapper(FieldSetMapper)}. |
| 270 | * |
| 271 | * @see org.springframework.batch.item.ItemReader#read() |
| 272 | */ |
| 273 | protected Object doRead() throws Exception { |
| 274 | if (noInput) { |
| 275 | return null; |
| 276 | } |
| 277 | String line = readLine(); |
| 278 | |
| 279 | if (line != null) { |
| 280 | int lineCount = getReader().getPosition(); |
| 281 | try { |
| 282 | FieldSet tokenizedLine = tokenizer.tokenize(line); |
| 283 | return fieldSetMapper.mapLine(tokenizedLine); |
| 284 | } |
| 285 | catch (RuntimeException ex) { |
| 286 | // add current line count to message and re-throw |
| 287 | throw new FlatFileParseException("Parsing error at line: " + lineCount + " in resource=" |
| 288 | + resource.getDescription() + ", input=[" + line + "]", ex, line, lineCount); |
| 289 | } |
| 290 | } |
| 291 | return null; |
| 292 | } |
| 293 | |
| 294 | } |