public static String[] tokenizeToStringArray( @Nullable String str, String delimiters, boolean trimTokens, boolean ignoreEmptyTokens) { if (str == null) { return EMPTY_STRING_ARRAY; } StringTokenizer st = new StringTokenizer(str, delimiters); List<String> tokens = new ArrayList<>(); while (st.hasMoreTokens()) { String token = st.nextToken(); if (trimTokens) { token = token.trim(); } if (!ignoreEmptyTokens || token.length() > 0) { tokens.add(token); } } return toStringArray(tokens); }
protected void buildExpression(String expression) throws ParseException { this.expressionParsed = true; try { if (this.seconds == null) { this.seconds = new TreeSet(); } if (this.minutes == null) { this.minutes = new TreeSet(); } if (this.hours == null) { this.hours = new TreeSet(); } if (this.daysOfMonth == null) { this.daysOfMonth = new TreeSet(); } if (this.months == null) { this.months = new TreeSet(); } if (this.daysOfWeek == null) { this.daysOfWeek = new TreeSet(); } if (this.years == null) { this.years = new TreeSet(); } int exprOn = 0; for(StringTokenizer exprsTok = new StringTokenizer(expression, " \t", false); exprsTok.hasMoreTokens() && exprOn <= 6; ++exprOn) { String expr = exprsTok.nextToken().trim(); if (exprOn == 3 && expr.indexOf(76) != -1 && expr.length() > 1 && expr.contains(",")) { throw new ParseException("Support for specifying 'L' and 'LW' with other days of the month is not implemented", -1); } if (exprOn == 5 && expr.indexOf(76) != -1 && expr.length() > 1 && expr.contains(",")) { throw new ParseException("Support for specifying 'L' with other days of the week is not implemented", -1); } if (exprOn == 5 && expr.indexOf(35) != -1 && expr.indexOf(35, expr.indexOf(35) + 1) != -1) { throw new ParseException("Support for specifying multiple \"nth\" days is not implemented.", -1); } StringTokenizer vTok = new StringTokenizer(expr, ","); while(vTok.hasMoreTokens()) { String v = vTok.nextToken(); this.storeExpressionVals(0, v, exprOn); } } if (exprOn <= 5) { throw new ParseException("Unexpected end of expression.", expression.length()); } else { if (exprOn <= 6) { this.storeExpressionVals(0, "*", 6); } TreeSet<Integer> dow = this.getSet(5); TreeSet<Integer> dom = this.getSet(3); boolean dayOfMSpec = !dom.contains(NO_SPEC); boolean dayOfWSpec = !dow.contains(NO_SPEC); if ((!dayOfMSpec || dayOfWSpec) && (!dayOfWSpec || dayOfMSpec)) { throw new ParseException("Support for specifying both a day-of-week AND a day-of-month parameter is not implemented.", 0); } } } catch (ParseException var8) { throw var8; } catch (Exception var9) { throw new ParseException("Illegal cron expression format (" + var9.toString() + ")", 0); } }
import; import java.util.List; import java.util.StringTokenizer; /** * @author xiaoxu * @date 2023-10-18 * spring_boot:com.xiaoxu.boot.tokenizer.TestStringTokenizer */ public class TestStringTokenizer { public static void main(String[] args) { print("你 好 吗\t我是 \t你的\t 朋友 \t", " \t", false); } public static void print(String str, String delimiter, boolean isReturnDelims) { System.out.println("切割字符串:【" + str + "】;" + "分隔符:【" + delimiter + "】。"); List<String> strs = Lists.newArrayList(); String s; boolean x; for (StringTokenizer strToken = new StringTokenizer(str, delimiter, false); strToken.hasMoreTokens(); x = (s != null && strs.add(s))) { s = strToken.nextToken(); System.out.println("切割:【" + s + "】"); if(s.equals("吗")) s = null; } System.out.println("字符串数组:" + strs); } }
切割字符串:【你 好 吗 我是 你的 朋友 】;分隔符:【 】。
字符串数组:[你, 好, 我是, 你的, 朋友]
public StringTokenizer(String str, String delim, boolean returnDelims) { currentPosition = 0; newPosition = -1; delimsChanged = false; this.str = str; maxPosition = str.length(); delimiters = delim; retDelims = returnDelims; setMaxDelimCodePoint(); }
private void setMaxDelimCodePoint() { if (delimiters == null) { maxDelimCodePoint = 0; return; } int m = 0; int c; int count = 0; for (int i = 0; i < delimiters.length(); i += Character.charCount(c)) { c = delimiters.charAt(i); if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_LOW_SURROGATE) { c = delimiters.codePointAt(i); hasSurrogates = true; } if (m < c) m = c; count++; } maxDelimCodePoint = m; if (hasSurrogates) { delimiterCodePoints = new int[count]; for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) { c = delimiters.codePointAt(j); delimiterCodePoints[i] = c; } } }
调用setMaxDelimCodePoint()方法,源码可知,切割时设置int maxDelimCodePoint,是为了优化分隔符的检测(取的是分隔字符串中char的ASCII码值最大的字符的ASCII值,存入maxDelimCodePoint中。在方法int scanToken(int startPos)中,若满足条件(c <= maxDelimCodePoint) && (delimiters.indexOf© >= 0),意即该字符的ASCII码值小于等于最大的maxDelimCodePoint,那么这个字符可能存在于分隔字符串中,再检测delimiters分隔字符串中是否包含该字符,反之,若ASCII码值大于分隔字符串中最大的maxDelimCodePoint,也就是说该字符一定不存在于分隔字符串里,&&直接跳过delimiters.indexOf的检测,也就达到了优化分隔符检测的效果了)。
private int scanToken(int startPos) { int position = startPos; while (position < maxPosition) { if (!hasSurrogates) { char c = str.charAt(position); if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0)) break; position++; } else { int c = str.codePointAt(position); if ((c <= maxDelimCodePoint) && isDelimiter(c)) break; position += Character.charCount(c); } } if (retDelims && (startPos == position)) { if (!hasSurrogates) { char c = str.charAt(position); if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0)) position++; } else { int c = str.codePointAt(position); if ((c <= maxDelimCodePoint) && isDelimiter(c)) position += Character.charCount(c); } } return position; }
scanToken方法即跳过分隔字符串,只要某此循环时,该字符包含在分隔字符串里,那么position不再自增,以此时的position值作为实际切割获取字符串的末索引, 因为subString方法是左闭右开的,该值是实际获取字符串的末索引值+1,所以可以截取到完整的不包含分隔符的字符串片段。
private int skipDelimiters(int startPos) { if (delimiters == null) throw new NullPointerException(); int position = startPos; while (!retDelims && position < maxPosition) { if (!hasSurrogates) { char c = str.charAt(position); if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0)) break; position++; } else { int c = str.codePointAt(position); if ((c > maxDelimCodePoint) || !isDelimiter(c)) { break; } position += Character.charCount(c); } } return position; }
public static void print2(String str, String delimiter, boolean isReturnDelims) { StringTokenizer strTokenizer = new StringTokenizer(str, delimiter); System.out.println("总数目:" + strTokenizer.countTokens()); int count; String[] strs = new String[count = strTokenizer.countTokens()]; // 注意:不要在循环里写 int i = 0; i < strTokenizer.countTokens(); // 因为 countTokens方法需要使用currentPosition,而每次执行nextToken方法时,currentPosition会一直往下偏移计算, // 会导致循环中, i < strTokenizer.countTokens();发生改变,这里应该是常量总数目 for (int i = 0; i < count; i++) { String s = strTokenizer.nextToken(); strs[i] = s; } System.out.println(Arrays.toString(strs)); }
public int countTokens() { int count = 0; int currpos = currentPosition; while (currpos < maxPosition) { currpos = skipDelimiters(currpos); if (currpos >= maxPosition) break; currpos = scanToken(currpos); count++; } return count; }
print2("1a2b3c4ca5bc6ba7abc8acbbaba9", "abc", false);
[1, 2, 3, 4, 5, 6, 7, 8, 9]