1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.argeo.util;
17
18 import java.io.BufferedReader;
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.io.InputStreamReader;
22 import java.util.ArrayList;
23 import java.util.Collections;
24 import java.util.List;
25
26
27
28
29
30
31 public abstract class CsvParser {
32 private char separator = ',';
33 private char quote = '\"';
34
35 private Boolean noHeader = false;
36 private Boolean strictLineAsLongAsHeader = true;
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 protected abstract void processLine(Integer lineNumber,
53 List<String> header, List<String> tokens);
54
55
56
57
58 public synchronized void parse(InputStream in) {
59 parse(in, null);
60 }
61
62
63
64
65 public synchronized void parse(InputStream in, String encoding) {
66 BufferedReader reader = null;
67 Integer lineCount = 0;
68 try {
69 if (encoding == null)
70 reader = new BufferedReader(new InputStreamReader(in));
71 else
72 reader = new BufferedReader(new InputStreamReader(in, encoding));
73 List<String> header = null;
74 if (!noHeader) {
75 String headerStr = reader.readLine();
76 if (headerStr == null)
77 return;
78 lineCount++;
79 header = new ArrayList<String>();
80 StringBuffer currStr = new StringBuffer("");
81 Boolean wasInquote = false;
82 while (parseLine(headerStr, header, currStr, wasInquote)) {
83 headerStr = reader.readLine();
84 if (headerStr == null)
85 break;
86 wasInquote = true;
87 }
88 header = Collections.unmodifiableList(header);
89 }
90
91 String line = null;
92 lines: while ((line = reader.readLine()) != null) {
93 line = preProcessLine(line);
94 if (line == null) {
95
96 continue lines;
97 }
98 lineCount++;
99 List<String> tokens = new ArrayList<String>();
100 StringBuffer currStr = new StringBuffer("");
101 Boolean wasInquote = false;
102 sublines: while (parseLine(line, tokens, currStr, wasInquote)) {
103 line = reader.readLine();
104 if (line == null)
105 break sublines;
106 wasInquote = true;
107 }
108 if (!noHeader && strictLineAsLongAsHeader) {
109 int headerSize = header.size();
110 int tokenSize = tokens.size();
111 if (tokenSize == 1 && line.trim().equals(""))
112 continue lines;
113 if (headerSize != tokenSize) {
114 throw new UtilsException("Token size " + tokenSize
115 + " is different from header size "
116 + headerSize + " at line " + lineCount
117 + ", line: " + line + ", header: " + header
118 + ", tokens: " + tokens);
119 }
120 }
121 processLine(lineCount, header, tokens);
122 }
123 } catch (UtilsException e) {
124 throw e;
125 } catch (IOException e) {
126 throw new UtilsException("Cannot parse CSV file (line: "
127 + lineCount + ")", e);
128 } finally {
129 StreamUtils.closeQuietly(reader);
130 }
131 }
132
133
134
135
136
137
138 protected String preProcessLine(String line) {
139 return line;
140 }
141
142
143
144
145
146
147 protected Boolean parseLine(String str, List<String> tokens,
148 StringBuffer currStr, Boolean wasInquote) {
149
150
151
152
153 if (wasInquote)
154 currStr.append('\n');
155
156 char[] arr = str.toCharArray();
157 boolean inQuote = wasInquote;
158
159 for (int i = 0; i < arr.length; i++) {
160 char c = arr[i];
161 if (c == separator) {
162 if (!inQuote) {
163 tokens.add(currStr.toString());
164
165 currStr.delete(0, currStr.length());
166 } else {
167
168
169
170 currStr.append(c);
171 }
172 } else if (c == quote) {
173 if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) {
174
175 currStr.append(quote);
176 i++;
177 } else {
178 inQuote = inQuote ? false : true;
179 }
180 } else {
181 currStr.append(c);
182 }
183 }
184
185 if (!inQuote) {
186 tokens.add(currStr.toString());
187
188 }
189
190
191
192 if (inQuote)
193 return true;
194 else
195 return false;
196
197 }
198
199 public char getSeparator() {
200 return separator;
201 }
202
203 public synchronized void setSeparator(char separator) {
204 this.separator = separator;
205 }
206
207 public char getQuote() {
208 return quote;
209 }
210
211 public synchronized void setQuote(char quote) {
212 this.quote = quote;
213 }
214
215 public Boolean getNoHeader() {
216 return noHeader;
217 }
218
219 public synchronized void setNoHeader(Boolean noHeader) {
220 this.noHeader = noHeader;
221 }
222
223 public Boolean getStrictLineAsLongAsHeader() {
224 return strictLineAsLongAsHeader;
225 }
226
227 public synchronized void setStrictLineAsLongAsHeader(
228 Boolean strictLineAsLongAsHeader) {
229 this.strictLineAsLongAsHeader = strictLineAsLongAsHeader;
230 }
231
232 }