-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathTokenizer.cs
More file actions
323 lines (294 loc) · 13.9 KB
/
Tokenizer.cs
File metadata and controls
323 lines (294 loc) · 13.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
/*
* Created by SharpDevelop.
* User: User
* Date: 14/10/2011
* Time: 22:33
*
* To change this template use Tools | Options | Coding | Edit Standard Headers.
*/
#region Using Directives
using System;
using System.Collections.Generic;
#endregion
/*
* $author: QmQ
* $source: http://www.codeproject.com/useritems/SimpleStringTokenizer.asp
* $date: 10-June-2006
*/
namespace JavaConvert.Data
{
#region [Summary and remarks]
/// <summary>
/// Implements a StringTokenizer class for splitting a string
/// into substrings using a set of delimiters.
/// </summary>
/// <remarks>
/// C# version of the java.util.StringTokenizer class.
/// Basicly it's a wrapper class around the <c>String.Split</c> method.<pare/>
/// It implements all of it's Java equivalent methods apart from those only needed by the Enumeration interface.
/// All implemented Java-compilant methods have their C# equivalents in properties. They however differ in names
/// since Java uses the (Hungarian-like) notation <c>runMe()</c> while C# uses Camel-cased <c>RunMe()</c> and thus
/// Java's <c>nextToken()</c> method is just an alias of the <c>NextToken</c> property.
/// </remarks>
#endregion
public class StringTokenizer : IEnumerable<string>
{
/// <summary>
/// String containing the default set of delimiters which are <c>" \t\n\r\f"</c>:
/// the space character, the tab character, the newline character, the carriage-return character, and the form-feed character.
/// </summary>
public const string DefaultDelimiters = " \t\n\r\f";
private readonly string delims = DefaultDelimiters;
private string[] tokens = null;
private int index = 0;
private string empty = String.Empty;
#region [Constructors]
/// <summary>
/// Constructs a string tokenizer for the specified string using the <see cref="F:DefaultDelimiters">default delimiters</see>.
/// </summary>
/// <param name="str">The string to be tokenized.</param>
/// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
public StringTokenizer(string str)
{
Tokenize(str, false, false);
}
/// <summary>
/// Constructs a string tokenizer for the specified string using the given delimiters.
/// </summary>
/// <param name="str">The string to be tokenized.</param>
/// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
/// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
public StringTokenizer(string str, string delims)
{
if(delims!=null) this.delims = delims;
Tokenize(str, false, false);
}
/// <summary>
/// Constructs a string tokenizer for the specified string using the given delimiters.
/// </summary>
/// <param name="str">The string to be tokenized.</param>
/// <param name="delims">The delimiters used to tokenize the string.</param>
public StringTokenizer(string str, params char[] delims)
{
if (delims != null) this.delims = new string(delims);
Tokenize(str, false, false);
}
/// <summary>
/// Constructs a string tokenizer for the specified string using the given delimiters and optionally returning them as tokens.
/// </summary>
/// <param name="str">The string to be tokenized.</param>
/// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
/// <param name="returnDelims">If set to <c>true</c> the encountered delimiters will also be returned as tokens.</param>
/// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
public StringTokenizer(string str, string delims, bool returnDelims)
{
if (delims != null) this.delims = delims;
Tokenize(str, returnDelims, false);
}
/// <summary>
/// Constructs a string tokenizer for the specified string using the given delimiters,
/// optionally returning them as tokens. Also empty tokens may be returned using the <see cref="!:String.Empty"/> string.
/// </summary>
/// <param name="str">The string to be tokenized.</param>
/// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
/// <param name="returnDelims">If set to <c>true</c> the encountered delimiters will also be returned as tokens.</param>
/// <param name="returnEmpty">If set to <c>true</c> empty tokens will also be returned.</param>
/// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
public StringTokenizer(string str, string delims, bool returnDelims, bool returnEmpty)
{
if (delims != null) this.delims = delims;
Tokenize(str, returnDelims, returnEmpty);
}
/// <summary>
/// Constructs a string tokenizer for the specified string using the given delimiters,
/// optionally returning them as tokens. Also empty tokens may be returned using the <paramref name="empty"/> string.
/// </summary>
/// <param name="str">The string to be tokenized.</param>
/// <param name="delims">The delimiters used to tokenize the string (each <see cref="!:char"/> will be used as a delimiter).</param>
/// <param name="returnDelims">If set to <c>true</c> the encountered delimiters will also be returned as tokens.</param>
/// <param name="returnEmpty">If set to <c>true</c> empty tokens will also be returned.</param>
/// <param name="empty">The string to be returned as an empty token.</param>
/// <exception cref="System.NullReferenceException">Thrown when the passed string is <c>null</c></exception>
public StringTokenizer(string str, string delims, bool returnDelims, bool returnEmpty, string empty)
{
if (delims != null) this.delims = delims;
this.empty = empty;
Tokenize(str, returnDelims, returnEmpty);
}
#endregion
#region [The big tokenization method]
private void Tokenize(string str, bool returnDelims, bool returnEmpty)
{
if(returnDelims)
{
this.tokens = str.Split(this.delims.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
List<string> tmp = new List<string>(tokens.Length << 1);
int delimIndex = str.IndexOfAny(this.delims.ToCharArray());
int tokensIndex = 0;
int prevDelimIdx = delimIndex-1;
if (delimIndex == 0)
do
{
tmp.Add(new string(str[delimIndex], 1));
prevDelimIdx = delimIndex++;
delimIndex = str.IndexOfAny(this.delims.ToCharArray(), delimIndex);
if (returnEmpty && delimIndex == prevDelimIdx + 1)
tmp.Add(this.empty);
} while (delimIndex == prevDelimIdx + 1);
while (delimIndex > -1)
{
tmp.Add(this.tokens[tokensIndex++]);
do
{
tmp.Add(new string(str[delimIndex], 1));
prevDelimIdx = delimIndex++;
delimIndex = str.IndexOfAny(this.delims.ToCharArray(), delimIndex);
if (returnEmpty && delimIndex == prevDelimIdx + 1)
tmp.Add(this.empty);
} while (delimIndex == prevDelimIdx + 1);
}
if (tokensIndex < tokens.Length)
tmp.Add(this.tokens[tokensIndex++]);
this.tokens = tmp.ToArray();
tmp = null;
}
else if (returnEmpty)
{
this.tokens = str.Split(this.delims.ToCharArray(), StringSplitOptions.None);
if (this.empty != String.Empty)
for(int i=0; i<this.tokens.Length; i++)
if (this.tokens[i] == String.Empty) this.tokens[i] = this.empty;
}
else
this.tokens = str.Split(this.delims.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
}
#endregion
#region [Properties covering Java methods]
/// <summary>
/// Tests if there are more tokens available from this tokenizer's string.
/// If this method returns <c>true</c>, then a subsequent
/// use of the <see cref="P:NextToken"/> property will successfully return a token.
/// </summary>
/// <value>
/// <c>true</c> if more tokens are available; otherwise <c>false</c>.
/// </value>
public bool HasMoreTokens
{
get { return this.index < this.tokens.Length; }
}
/// <summary>
/// Gets the next token.
/// </summary>
/// <value>The next token.</value>
/// <exception cref="System.IndexOutOfRangeException">Thrown when trying to get a token which doesn't exist.
/// Usually caused by not checking if the <see cref="P:HasMoreTokens"/> property returns <c>true</c> before trying to get the next token.</exception>
public string NextToken
{
get { return this.tokens[index++]; }
}
/// <summary>
/// Counts the <see cref="!:remaining"/> tokens - the number of times the
/// <see cref="P:NextToken"/> property can be used before it throws an exception.
/// </summary>
/// <value>The number of remaining tokens.</value>
/// <seealso cref="P:Count"/>
public int CountTokens
{
get { return this.tokens.Length - this.index; }
}
#endregion
#region [New methods/properties]
/// <summary>
/// Gets the total number of tokens extracted.
/// </summary>
/// <remarks>
/// <see cref="!:Equivalent not available in Java!"/>
/// This property returns the total number of extracted tokens,
/// contrary to <see cref="P:CountTokens"/>.
/// </remarks>
/// <value>The number of tokens extracted.</value>
/// <seealso cref="P:StringTokenizer.CountTokens"/>
public int Count
{
get { return this.tokens.Length; }
}
/// <summary>
/// Gets the token with the specified index from the tokenizer without moving the current position index.
/// </summary>
/// <remarks><see cref="!:Equivalent not available in Java!"/></remarks>
/// <param name="index">The index of the token to get.</param>
/// <value>The token with the given index</value>
/// <exception cref="System.IndexOutOfRangeException">Thrown when trying to get a token which doesn't exist, that is when <see cref="!:index"/> is equal or greater then <see cref="!:Count"/> or <see cref="!:index"/> is negative.</exception>
public string this[int index]
{
get { return this.tokens[index]; }
}
/// <summary>
/// Resets the current position index so that the tokens can be extracted again.
/// </summary>
/// <remarks><see cref="!:Equivalent not available in Java!"/></remarks>
public void Reset()
{
this.index = 0;
}
/// <summary>
/// Gets the currently set string for empty tokens.
/// </summary>
/// <remarks>Default is <c>System.String.Empty</c></remarks>
/// <value>The empty token string.</value>
public string EmptyString
{
get { return this.empty; }
}
#endregion
#region [Java-compilant methods]
/*
/// <summary>
/// Tests if there are more tokens available from this tokenizer's string.
/// If this method returns <c>true</c>, then a subsequent call to <see cref="M:nextToken"/> will successfully return a token.
/// </summary>
/// <returns>
/// <c>true</c> if and only if there is at least one token in the string after the current position; otherwise <c>false</c>.
/// </returns>
/// <seealso cref="M:nextToken"/>
public bool hasMoreTokens()
{
return HasMoreTokens;
}
/// <summary>
/// Returns the next token from this string tokenizer.
/// </summary>
/// <returns>The next token from this string tokenizer.</returns>
public string nextToken()
{
return NextToken;
}
/// <summary>
/// Calculates the number of times that this tokenizer's <see cref="M:nextToken"/> method can be called before it generates an exception. The current position is not advanced.
/// </summary>
/// <returns>The number of tokens remaining in the string using the current delimiter set.</returns>
public int countTokens()
{
return CountTokens;
}
*/
#endregion
#region [IEnumerable implementation]
/// <summary>
/// Returns an enumerator that iterates through the collection.
/// </summary>
/// <returns>
/// A <see cref="T:System.Collections.Generic.IEnumerator"/> that can be used to iterate through the collection.
/// </returns>
public IEnumerator<string> GetEnumerator()
{
while (this.HasMoreTokens)
yield return this.NextToken;
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
#endregion
}
}