View Javadoc
1   /*
2    * Copyright (C) 2007-2012 Argeo GmbH
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *         http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.argeo.slc.diff;
17  
18  import java.util.ArrayList;
19  import java.util.List;
20  
21  /**
22   * Parses a string as a vector of strings according to a separator, dealing
23   * properly with missing values. This is intended to be used instead of the
24   * standard StringTokenizer, which does not deal well with empty values.
25   * Contrary to the StringTokenizer the provided String is parsed in the
26   * constructor and the values stored as a property. This should therefore not be
27   * used to parse long strings. No reference to the argument passed in
28   * constructor is kept.
29   */
30  public class LineTokenizer {
31  	private final List<String> tokens;
32  
33  	/** Complete constructor. */
34  	public LineTokenizer(String stringToParse, Character separator,
35  			String noValueString) {
36  		this.tokens = parse(stringToParse, separator, noValueString);
37  	}
38  
39  	/**
40  	 * Parse the string as a vector of strings. Can be overridden in order to
41  	 * provide another implementation.
42  	 */
43  	protected List<String> parse(final String stringToParse,
44  			final char separator, final String noValueString) {
45  		// Init
46  		final int NULL = -1;
47  		List<String> res = new ArrayList<String>();
48  		final char[] array = stringToParse.toCharArray();
49  		int lastSeparatorIndex = NULL;
50  
51  		// Loop on chars
52  		for (int currIndex = 0; currIndex < array.length; currIndex++) {
53  			char c = array[currIndex];
54  			if (c == separator) {
55  				if (currIndex == 0) {
56  					// first char is a separator
57  					res.add(new String(noValueString));
58  					lastSeparatorIndex = 0;
59  				} else if (lastSeparatorIndex == NULL) {
60  					// first separator found
61  					res.add(new String(array, 0, currIndex));
62  					lastSeparatorIndex = currIndex;
63  				} else if (lastSeparatorIndex != NULL
64  						&& (lastSeparatorIndex == (currIndex - 1))) {
65  					// consecutive separators
66  					res.add(new String(noValueString));
67  					lastSeparatorIndex = currIndex;
68  				} else {
69  					// simple case
70  					res.add(new String(array, lastSeparatorIndex + 1, currIndex
71  							- lastSeparatorIndex - 1));
72  					lastSeparatorIndex = currIndex;
73  				}
74  			}
75  		}
76  
77  		// Finalize
78  		if (lastSeparatorIndex == NULL) {
79  			// no separator found
80  			res.add(new String(stringToParse));
81  		} else if (lastSeparatorIndex == (array.length - 1)) {
82  			// last char is a separator
83  			res.add(new String(noValueString));
84  		} else {
85  			// last token
86  			res.add(new String(array, lastSeparatorIndex + 1, array.length
87  					- lastSeparatorIndex - 1));
88  		}
89  		return res;
90  	}
91  
92  	/** The tokens. */
93  	public List<String> getTokens() {
94  		return tokens;
95  	}
96  
97  	/** Parse */
98  	public static List<String> tokenize(String stringToParse,
99  			Character separator, String noValueString) {
100 		LineTokenizer lt = new LineTokenizer(stringToParse, separator,
101 				noValueString);
102 		return lt.getTokens();
103 	}
104 
105 	/** Parse, using the empty string as no value string. */
106 	public static List<String> tokenize(String stringToParse,
107 			Character separator) {
108 		return tokenize(stringToParse, separator, "");
109 	}
110 
111 }