1 | /* |
2 | * Copyright 2006-2014 the original author or authors. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | package org.springframework.batch.item.file.transform; |
18 | |
19 | import java.util.ArrayList; |
20 | import java.util.Arrays; |
21 | import java.util.List; |
22 | |
23 | /** |
24 | * Tokenizer used to process data obtained from files with fixed-length format. |
25 | * Columns are specified by array of Range objects ({@link #setColumns(Range[])} |
26 | * ). |
27 | * |
28 | * @author tomas.slanina |
29 | * @author peter.zozom |
30 | * @author Dave Syer |
31 | * @author Lucas Ward |
32 | * @author Michael Minella |
33 | */ |
34 | public class FixedLengthTokenizer extends AbstractLineTokenizer { |
35 | |
36 | private Range[] ranges; |
37 | |
38 | private int maxRange = 0; |
39 | |
40 | boolean open = false; |
41 | |
42 | /** |
43 | * Set the column ranges. Used in conjunction with the |
44 | * {@link RangeArrayPropertyEditor} this property can be set in the form of |
45 | * a String describing the range boundaries, e.g. "1,4,7" or "1-3,4-6,7" or |
46 | * "1-2,4-5,7-10". If the last range is open then the rest of the line is |
47 | * read into that column (irrespective of the strict flag setting). |
48 | * |
49 | * @see #setStrict(boolean) |
50 | * |
51 | * @param ranges the column ranges expected in the input |
52 | */ |
53 | public void setColumns(Range[] ranges) { |
54 | this.ranges = Arrays.asList(ranges).toArray(new Range[ranges.length]); |
55 | calculateMaxRange(ranges); |
56 | } |
57 | |
58 | /* |
59 | * Calculate the highest value within an array of ranges. The ranges aren't |
60 | * necessarily in order. For example: "5-10, 1-4,11-15". Furthermore, there |
61 | * isn't always a min and max, such as: "1,4-20, 22" |
62 | */ |
63 | private void calculateMaxRange(Range[] ranges) { |
64 | if (ranges == null || ranges.length == 0) { |
65 | maxRange = 0; |
66 | return; |
67 | } |
68 | |
69 | open = false; |
70 | maxRange = ranges[0].getMin(); |
71 | |
72 | for (int i = 0; i < ranges.length; i++) { |
73 | int upperBound; |
74 | if (ranges[i].hasMaxValue()) { |
75 | upperBound = ranges[i].getMax(); |
76 | } |
77 | else { |
78 | upperBound = ranges[i].getMin(); |
79 | if (upperBound > maxRange) { |
80 | open = true; |
81 | } |
82 | } |
83 | |
84 | if (upperBound > maxRange) { |
85 | maxRange = upperBound; |
86 | } |
87 | } |
88 | } |
89 | |
90 | /** |
91 | * Yields the tokens resulting from the splitting of the supplied |
92 | * <code>line</code>. |
93 | * |
94 | * @param line the line to be tokenised (can be <code>null</code>) |
95 | * |
96 | * @return the resulting tokens (empty if the line is null) |
97 | * @throws IncorrectLineLengthException if line length is greater than or |
98 | * less than the max range set. |
99 | */ |
100 | @Override |
101 | protected List<String> doTokenize(String line) { |
102 | List<String> tokens = new ArrayList<String>(ranges.length); |
103 | int lineLength; |
104 | String token; |
105 | |
106 | lineLength = line.length(); |
107 | |
108 | if (lineLength < maxRange && isStrict()) { |
109 | throw new IncorrectLineLengthException("Line is shorter than max range " + maxRange, maxRange, lineLength, line); |
110 | } |
111 | |
112 | if (!open && lineLength > maxRange && isStrict()) { |
113 | throw new IncorrectLineLengthException("Line is longer than max range " + maxRange, maxRange, lineLength, line); |
114 | } |
115 | |
116 | for (int i = 0; i < ranges.length; i++) { |
117 | |
118 | int startPos = ranges[i].getMin() - 1; |
119 | int endPos = ranges[i].getMax(); |
120 | |
121 | if (lineLength >= endPos) { |
122 | token = line.substring(startPos, endPos); |
123 | } |
124 | else if (lineLength >= startPos) { |
125 | token = line.substring(startPos); |
126 | } |
127 | else { |
128 | token = ""; |
129 | } |
130 | |
131 | tokens.add(token); |
132 | } |
133 | |
134 | return tokens; |
135 | } |
136 | } |