View Javadoc

1   /*
2    * Copyright 2006-2012 the original author or authors.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.springframework.batch.item.file.transform;
17  
18  import java.util.ArrayList;
19  import java.util.Collections;
20  import java.util.List;
21  import java.util.regex.Matcher;
22  import java.util.regex.Pattern;
23  
24  import org.springframework.util.Assert;
25  
26  /**
27   * Line-tokenizer using a regular expression to filter out data (by using matching and non-matching groups).
28   * Consider the following regex which picks only the first and last name (notice the non-matching group in the middle):
29   * <pre>
30   * (.*?)(?: .*)* (.*) 
31   * </pre>
32   * For the names:
33   * <ul>  
34   *  <li>"Graham James Edward Miller"</li>
35   *  <li>"Andrew Gregory Macintyre"</li>
36   *  <li>"No MiddleName"</li>
37   * </ul> 
38   * 
39   * the output will be:
40   * <ul>
41   * <li>"Miller", "Graham"</li>
42   * <li>"Macintyre", "Andrew"</li>
43   * <li>"MiddleName", "No"</li>
44   * </ul>
45   * 
46   * An empty list is returned, in case of a non-match.
47   * 
48   * @see Matcher#group(int)
49   * @author Costin Leau
50   */
51  public class RegexLineTokenizer extends AbstractLineTokenizer {
52  
53  	private Pattern pattern;
54  
55  	@Override
56  	protected List<String> doTokenize(String line) {
57  		Matcher matcher = pattern.matcher(line);
58  		boolean matchFound = matcher.find();
59  
60  		if (matchFound) {
61  			List<String> tokens = new ArrayList<String>(matcher.groupCount());
62  			for (int i = 1; i <= matcher.groupCount(); i++) {
63  				tokens.add(matcher.group(i));
64  			}
65  			return tokens;
66  		}
67  		return Collections.emptyList();
68  	}
69  
70  	/**
71  	 * Sets the regex pattern to use.
72  	 * 
73  	 * @param pattern Regular Expression pattern
74  	 */
75  	public void setPattern(Pattern pattern) {
76  		Assert.notNull(pattern, "a non-null pattern is required");
77  		this.pattern = pattern;
78  	}
79  
80  	/**
81  	 * Sets the regular expression to use. 
82  	 * 
83  	 * @param regex regular expression (as a String)
84  	 */
85  	public void setRegex(String regex) {
86  		Assert.hasText(regex, "a valid regex is required");
87  		this.pattern = Pattern.compile(regex);
88  	}
89  }