1 | /* |
2 | * Copyright 2006-2007 the original author or authors. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | package org.springframework.batch.item.file.transform; |
18 | |
19 | import java.util.ArrayList; |
20 | import java.util.List; |
21 | |
22 | /** |
23 | * Tokenizer used to process data obtained from files with fixed-length format. |
24 | * Columns are specified by array of Range objects ({@link #setColumns(Range[])} |
25 | * ). |
26 | * |
27 | * @author tomas.slanina |
28 | * @author peter.zozom |
29 | * @author Dave Syer |
30 | * @author Lucas Ward |
31 | */ |
32 | public class FixedLengthTokenizer extends AbstractLineTokenizer { |
33 | |
34 | private Range[] ranges; |
35 | |
36 | private int maxRange = 0; |
37 | |
38 | boolean open = false; |
39 | |
40 | private boolean strict = true; |
41 | |
42 | /** |
43 | * Public setter for the strict flag. If true (the default) then lines must |
44 | * be precisely the length specified by the columns. If false then shorter |
45 | * lines will be tolerated and padded with empty columns, and longer strings |
46 | * will simply be truncated. |
47 | * |
48 | * @see #setColumns(Range[]) |
49 | * |
50 | * @param strict the strict to set |
51 | */ |
52 | public void setStrict(boolean strict) { |
53 | this.strict = strict; |
54 | } |
55 | |
56 | /** |
57 | * Set the column ranges. Used in conjunction with the |
58 | * {@link RangeArrayPropertyEditor} this property can be set in the form of |
59 | * a String describing the range boundaries, e.g. "1,4,7" or "1-3,4-6,7" or |
60 | * "1-2,4-5,7-10". If the last range is open then the rest of the line is |
61 | * read into that column (irrespective of the strict flag setting). |
62 | * |
63 | * @see #setStrict(boolean) |
64 | * |
65 | * @param ranges the column ranges expected in the input |
66 | */ |
67 | public void setColumns(Range[] ranges) { |
68 | this.ranges = ranges; |
69 | calculateMaxRange(ranges); |
70 | } |
71 | |
72 | /* |
73 | * Calculate the highest value within an array of ranges. The ranges aren't |
74 | * necessarily in order. For example: "5-10, 1-4,11-15". Furthermore, there |
75 | * isn't always a min and max, such as: "1,4-20, 22" |
76 | */ |
77 | private void calculateMaxRange(Range[] ranges) { |
78 | if (ranges == null || ranges.length == 0) { |
79 | maxRange = 0; |
80 | return; |
81 | } |
82 | |
83 | open = false; |
84 | maxRange = ranges[0].getMin(); |
85 | |
86 | for (int i = 0; i < ranges.length; i++) { |
87 | int upperBound; |
88 | if (ranges[i].hasMaxValue()) { |
89 | upperBound = ranges[i].getMax(); |
90 | } |
91 | else { |
92 | upperBound = ranges[i].getMin(); |
93 | if (upperBound > maxRange) { |
94 | open = true; |
95 | } |
96 | } |
97 | |
98 | if (upperBound > maxRange) { |
99 | maxRange = upperBound; |
100 | } |
101 | } |
102 | } |
103 | |
104 | /** |
105 | * Yields the tokens resulting from the splitting of the supplied |
106 | * <code>line</code>. |
107 | * |
108 | * @param line the line to be tokenised (can be <code>null</code>) |
109 | * |
110 | * @return the resulting tokens (empty if the line is null) |
111 | * @throws IncorrectLineLengthException if line length is greater than or |
112 | * less than the max range set. |
113 | */ |
114 | protected List doTokenize(String line) { |
115 | List tokens = new ArrayList(ranges.length); |
116 | int lineLength; |
117 | String token; |
118 | |
119 | lineLength = line.length(); |
120 | |
121 | if (lineLength < maxRange && strict) { |
122 | throw new IncorrectLineLengthException("Line is shorter than max range " + maxRange, maxRange, lineLength); |
123 | } |
124 | |
125 | if (!open && lineLength > maxRange && strict) { |
126 | throw new IncorrectLineLengthException("Line is longer than max range " + maxRange, maxRange, lineLength); |
127 | } |
128 | |
129 | for (int i = 0; i < ranges.length; i++) { |
130 | |
131 | int startPos = ranges[i].getMin() - 1; |
132 | int endPos = ranges[i].getMax(); |
133 | |
134 | if (lineLength >= endPos) { |
135 | token = line.substring(startPos, endPos); |
136 | } |
137 | else if (lineLength >= startPos) { |
138 | token = line.substring(startPos); |
139 | } |
140 | else { |
141 | token = ""; |
142 | } |
143 | |
144 | tokens.add(token); |
145 | } |
146 | |
147 | return tokens; |
148 | } |
149 | } |