| 1 | /* |
| 2 | * Copyright 2006-2012 the original author or authors. |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | package org.springframework.batch.item.file.transform; |
| 17 | |
| 18 | import java.util.ArrayList; |
| 19 | import java.util.Collections; |
| 20 | import java.util.List; |
| 21 | import java.util.regex.Matcher; |
| 22 | import java.util.regex.Pattern; |
| 23 | |
| 24 | import org.springframework.util.Assert; |
| 25 | |
| 26 | /** |
| 27 | * Line-tokenizer using a regular expression to filter out data (by using matching and non-matching groups). |
| 28 | * Consider the following regex which picks only the first and last name (notice the non-matching group in the middle): |
| 29 | * <pre> |
| 30 | * (.*?)(?: .*)* (.*) |
| 31 | * </pre> |
| 32 | * For the names: |
| 33 | * <ul> |
| 34 | * <li>"Graham James Edward Miller"</li> |
| 35 | * <li>"Andrew Gregory Macintyre"</li> |
| 36 | * <li>"No MiddleName"</li> |
| 37 | * </ul> |
| 38 | * |
| 39 | * the output will be: |
| 40 | * <ul> |
| 41 | * <li>"Miller", "Graham"</li> |
| 42 | * <li>"Macintyre", "Andrew"</li> |
| 43 | * <li>"MiddleName", "No"</li> |
| 44 | * </ul> |
| 45 | * |
| 46 | * An empty list is returned, in case of a non-match. |
| 47 | * |
| 48 | * @see Matcher#group(int) |
| 49 | * @author Costin Leau |
| 50 | */ |
| 51 | public class RegexLineTokenizer extends AbstractLineTokenizer { |
| 52 | |
| 53 | private Pattern pattern; |
| 54 | |
| 55 | @Override |
| 56 | protected List<String> doTokenize(String line) { |
| 57 | Matcher matcher = pattern.matcher(line); |
| 58 | boolean matchFound = matcher.find(); |
| 59 | |
| 60 | if (matchFound) { |
| 61 | List<String> tokens = new ArrayList<String>(matcher.groupCount()); |
| 62 | for (int i = 1; i <= matcher.groupCount(); i++) { |
| 63 | tokens.add(matcher.group(i)); |
| 64 | } |
| 65 | return tokens; |
| 66 | } |
| 67 | return Collections.emptyList(); |
| 68 | } |
| 69 | |
| 70 | /** |
| 71 | * Sets the regex pattern to use. |
| 72 | * |
| 73 | * @param pattern Regular Expression pattern |
| 74 | */ |
| 75 | public void setPattern(Pattern pattern) { |
| 76 | Assert.notNull(pattern, "a non-null pattern is required"); |
| 77 | this.pattern = pattern; |
| 78 | } |
| 79 | |
| 80 | /** |
| 81 | * Sets the regular expression to use. |
| 82 | * |
| 83 | * @param regex regular expression (as a String) |
| 84 | */ |
| 85 | public void setRegex(String regex) { |
| 86 | Assert.hasText(regex, "a valid regex is required"); |
| 87 | this.pattern = Pattern.compile(regex); |
| 88 | } |
| 89 | } |