1 | /* |
2 | * Copyright 2006-2012 the original author or authors. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | package org.springframework.batch.item.file.transform; |
17 | |
18 | import java.util.ArrayList; |
19 | import java.util.Collections; |
20 | import java.util.List; |
21 | import java.util.regex.Matcher; |
22 | import java.util.regex.Pattern; |
23 | |
24 | import org.springframework.util.Assert; |
25 | |
26 | /** |
27 | * Line-tokenizer using a regular expression to filter out data (by using matching and non-matching groups). |
28 | * Consider the following regex which picks only the first and last name (notice the non-matching group in the middle): |
29 | * <pre> |
30 | * (.*?)(?: .*)* (.*) |
31 | * </pre> |
32 | * For the names: |
33 | * <ul> |
34 | * <li>"Graham James Edward Miller"</li> |
35 | * <li>"Andrew Gregory Macintyre"</li> |
36 | * <li>"No MiddleName"</li> |
37 | * </ul> |
38 | * |
39 | * the output will be: |
40 | * <ul> |
41 | * <li>"Miller", "Graham"</li> |
42 | * <li>"Macintyre", "Andrew"</li> |
43 | * <li>"MiddleName", "No"</li> |
44 | * </ul> |
45 | * |
46 | * An empty list is returned, in case of a non-match. |
47 | * |
48 | * @see Matcher#group(int) |
49 | * @author Costin Leau |
50 | */ |
51 | public class RegexLineTokenizer extends AbstractLineTokenizer { |
52 | |
53 | private Pattern pattern; |
54 | |
55 | @Override |
56 | protected List<String> doTokenize(String line) { |
57 | Matcher matcher = pattern.matcher(line); |
58 | boolean matchFound = matcher.find(); |
59 | |
60 | if (matchFound) { |
61 | List<String> tokens = new ArrayList<String>(matcher.groupCount()); |
62 | for (int i = 1; i <= matcher.groupCount(); i++) { |
63 | tokens.add(matcher.group(i)); |
64 | } |
65 | return tokens; |
66 | } |
67 | return Collections.emptyList(); |
68 | } |
69 | |
70 | /** |
71 | * Sets the regex pattern to use. |
72 | * |
73 | * @param pattern Regular Expression pattern |
74 | */ |
75 | public void setPattern(Pattern pattern) { |
76 | Assert.notNull(pattern, "a non-null pattern is required"); |
77 | this.pattern = pattern; |
78 | } |
79 | |
80 | /** |
81 | * Sets the regular expression to use. |
82 | * |
83 | * @param regex regular expression (as a String) |
84 | */ |
85 | public void setRegex(String regex) { |
86 | Assert.hasText(regex, "a valid regex is required"); |
87 | this.pattern = Pattern.compile(regex); |
88 | } |
89 | } |