import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CountChineseCharactersUDF extends UDF {
    private static final Pattern CHINESE_CHAR_PATTERN = Pattern.compile("[\\u4e00-\\u9fa5]");

    public int evaluate(String str) {
        if (str == null) {
            return 0;
        }

        Matcher matcher = CHINESE_CHAR_PATTERN.matcher(str);
        int count = 0;

        while (matcher.find()) {
            count++;
        }
        return count;
    }

    public static void main(String[] args) {
        String s="小时候看泰剧,\"-->\n" +
                "\n";
        System.out.println(new CountChineseCharactersUDF().evaluate(s));
    }
}