源代码是:/home/fhqplzj/.m2/repository/org/apache/spark/spark-mllib_2.11/2.0.0/spark-mllib_2.11-2.0.0-sources.jar!/org/apache/spark/mllib/util/NumericParser.scala
spark里面的一个函数,用java重写了一遍:
package clustering.garbage;
import com.google.common.collect.Lists;
import com.google.common.primitives.Doubles;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
/**
* Created by fhqplzj on 16-12-10 at 上午9:17.
*/
public class NumericParser {
/**
* 解析double
*
* @param s
* @return
*/
private double parseDouble(String s) {
try {
return Double.parseDouble(s);
} catch (NumberFormatException e) {
throw new IllegalArgumentException(String.format("Cannot parse a double from: %s", s), e);
}
}
/**
* 解析数组
*
* @param tokenizer
* @return
*/
private double[] parseArray(StringTokenizer tokenizer) {
ArrayList<Double> values = Lists.newArrayList();
boolean parsing = true;
boolean allowComma = false;
String token;
while (parsing && tokenizer.hasMoreTokens()) {
token = tokenizer.nextToken();
switch (token) {
case "]":
parsing = false;
break;
case ",":
if (allowComma) {
allowComma = false;
} else {
throw new IllegalArgumentException("Found a ',' at a wrong position.");
}
break;
default:
values.add(parseDouble(token));
allowComma = true;
break;
}
}
if (parsing) {
throw new IllegalArgumentException("An array must end with ']'.");
}
return Doubles.toArray(values);
}
/**
* 解析元组
*
* @param tokenizer
* @return
*/
private List<Object> parseTuple(StringTokenizer tokenizer) {
ArrayList<Object> items = Lists.newArrayList();
boolean parsing = true;
boolean allowComma = false;
String token;
while (parsing && tokenizer.hasMoreTokens()) {
token = tokenizer.nextToken();
switch (token) {
case "(":
items.add(parseTuple(tokenizer));
allowComma = true;
break;
case "[":
items.add(parseArray(tokenizer));
allowComma = true;
break;
case ",":
if (allowComma) {
allowComma = false;
} else {
throw new IllegalArgumentException("Found a ',' at a wrong position.");
}
break;
case ")":
parsing = false;
break;
default:
items.add(parseDouble(token));
allowComma = true;
break;
}
}
if (parsing) {
throw new IllegalArgumentException("A tuple must end with ')'.");
}
return items;
}
/**
* 解析字符串
*
* @param s
* @return
*/
public Object parse(String s) {
StringTokenizer tokenizer = new StringTokenizer(s, "()[],", true);
if (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
switch (token) {
case "(":
return parseTuple(tokenizer);
case "[":
return parseArray(tokenizer);
default:
return parseDouble(token);
}
} else {
throw new IllegalArgumentException("Cannot find any token from the input string.");
}
}
public static void main(String[] args) {
NumericParser numericParser = new NumericParser();
System.out.println(numericParser.parse("((1,2),(3,4))"));
}
}