import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CountChineseCharactersUDF extends UDF {
private static final Pattern CHINESE_CHAR_PATTERN = Pattern.compile("[\\u4e00-\\u9fa5]");
public int evaluate(String str) {
if (str == null) {
return 0;
}
Matcher matcher = CHINESE_CHAR_PATTERN.matcher(str);
int count = 0;
while (matcher.find()) {
count++;
}
return count;
}
public static void main(String[] args) {
String s="小时候看泰剧,\"-->\n" +
"\n";
System.out.println(new CountChineseCharactersUDF().evaluate(s));
}
}
没有评论