1 | /* |
2 | * Copyright 2006-2007 the original author or authors. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | package org.springframework.batch.item.file.transform; |
18 | |
19 | import java.util.ArrayList; |
20 | import java.util.Arrays; |
21 | import java.util.List; |
22 | |
23 | /** |
24 | * Tokenizer used to process data obtained from files with fixed-length format. |
25 | * Columns are specified by array of Range objects ({@link #setColumns(Range[])} |
26 | * ). |
27 | * |
28 | * @author tomas.slanina |
29 | * @author peter.zozom |
30 | * @author Dave Syer |
31 | * @author Lucas Ward |
32 | */ |
33 | public class FixedLengthTokenizer extends AbstractLineTokenizer { |
34 | |
35 | private Range[] ranges; |
36 | |
37 | private int maxRange = 0; |
38 | |
39 | boolean open = false; |
40 | |
41 | /** |
42 | * Set the column ranges. Used in conjunction with the |
43 | * {@link RangeArrayPropertyEditor} this property can be set in the form of |
44 | * a String describing the range boundaries, e.g. "1,4,7" or "1-3,4-6,7" or |
45 | * "1-2,4-5,7-10". If the last range is open then the rest of the line is |
46 | * read into that column (irrespective of the strict flag setting). |
47 | * |
48 | * @see #setStrict(boolean) |
49 | * |
50 | * @param ranges the column ranges expected in the input |
51 | */ |
52 | public void setColumns(Range[] ranges) { |
53 | this.ranges = Arrays.asList(ranges).toArray(new Range[ranges.length]); |
54 | calculateMaxRange(ranges); |
55 | } |
56 | |
57 | /* |
58 | * Calculate the highest value within an array of ranges. The ranges aren't |
59 | * necessarily in order. For example: "5-10, 1-4,11-15". Furthermore, there |
60 | * isn't always a min and max, such as: "1,4-20, 22" |
61 | */ |
62 | private void calculateMaxRange(Range[] ranges) { |
63 | if (ranges == null || ranges.length == 0) { |
64 | maxRange = 0; |
65 | return; |
66 | } |
67 | |
68 | open = false; |
69 | maxRange = ranges[0].getMin(); |
70 | |
71 | for (int i = 0; i < ranges.length; i++) { |
72 | int upperBound; |
73 | if (ranges[i].hasMaxValue()) { |
74 | upperBound = ranges[i].getMax(); |
75 | } |
76 | else { |
77 | upperBound = ranges[i].getMin(); |
78 | if (upperBound > maxRange) { |
79 | open = true; |
80 | } |
81 | } |
82 | |
83 | if (upperBound > maxRange) { |
84 | maxRange = upperBound; |
85 | } |
86 | } |
87 | } |
88 | |
89 | /** |
90 | * Yields the tokens resulting from the splitting of the supplied |
91 | * <code>line</code>. |
92 | * |
93 | * @param line the line to be tokenised (can be <code>null</code>) |
94 | * |
95 | * @return the resulting tokens (empty if the line is null) |
96 | * @throws IncorrectLineLengthException if line length is greater than or |
97 | * less than the max range set. |
98 | */ |
99 | protected List<String> doTokenize(String line) { |
100 | List<String> tokens = new ArrayList<String>(ranges.length); |
101 | int lineLength; |
102 | String token; |
103 | |
104 | lineLength = line.length(); |
105 | |
106 | if (lineLength < maxRange && isStrict()) { |
107 | throw new IncorrectLineLengthException("Line is shorter than max range " + maxRange, maxRange, lineLength); |
108 | } |
109 | |
110 | if (!open && lineLength > maxRange && isStrict()) { |
111 | throw new IncorrectLineLengthException("Line is longer than max range " + maxRange, maxRange, lineLength); |
112 | } |
113 | |
114 | for (int i = 0; i < ranges.length; i++) { |
115 | |
116 | int startPos = ranges[i].getMin() - 1; |
117 | int endPos = ranges[i].getMax(); |
118 | |
119 | if (lineLength >= endPos) { |
120 | token = line.substring(startPos, endPos); |
121 | } |
122 | else if (lineLength >= startPos) { |
123 | token = line.substring(startPos); |
124 | } |
125 | else { |
126 | token = ""; |
127 | } |
128 | |
129 | tokens.add(token); |
130 | } |
131 | |
132 | return tokens; |
133 | } |
134 | } |