hive SQL named_struct COLLECT_LIST 和排序

将数组类型的字符串按照指定的key的值进行排序并输出

select
      user_id,
      recommend_id content_id,
      a.title,
      viewpoint,
      content_type,
      audit_status,
      sort_json_array_and_concat(to_json(COLLECT_LIST(named_struct('pos', pos, 'item', b.title))),'pos','item')  AS theme_infos,
      create_time   
    from
      (
        SELECT
          user_id,
          recommend_id,
          title,
          viewpoint,
          content_type,
          audit_status,
          nvl(item.topicId, '') topicId,
          create_time ,
          pos
        FROM
          base1.table2
          LATERAL VIEW posexplode_outer(
            from_json(
              COALESCE(theme_ids, '[]'),
              'array<struct<topicId:string>>'
            )
          ) AS pos, item
        WHERE
          day = '${day}'
          and to_date(create_time) >= '2025-01-20'
         -- and audit_status in (1, 2, 10)
        GROUP BY
          user_id,
          recommend_id,
          title,
          viewpoint,
          content_type,
          audit_status,
          topicId,
          create_time,
          pos
      ) a
      left outer join (
        select
          subject_id,
          replace(title, '#', '') title
        from
          base1.table1
        where
          day = '${day}'
      ) b on a.topicId = b.subject_id
    group by
      user_id,
      recommend_id,
      a.title,
      viewpoint,
      content_type,
      audit_status,
      create_time

排序UDF函数

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

@Description(name = "sort_json_array_and_concat",
        value = "Sort a JSON array by a given key and concatenate another key's values with commas",
        extended = "SELECT sort_json_array_and_concat('[{\"id\": 2, \"name\": \"Bob\"}, {\"id\": 1, \"name\": \"Alice\"}]', 'id', 'name');")
public class SortJsonArrayAndConcatUDF extends UDF {


    public String evaluate(String jsonArrayStr, String key1, String key2) {
        if (jsonArrayStr == null || key1 == null || key2 == null) {
            return null;
        }
        try {
            // 解析输入的 JSON 字符串为 JSONArray
            JSONArray jsonArray = new JSONArray(jsonArrayStr.toString());
            List<JSONObject> jsonObjects = new ArrayList<>();
            for (int i = 0; i < jsonArray.length(); i++) {
                jsonObjects.add(jsonArray.getJSONObject(i));
            }

            // 按照 key1 的值进行排序
            Collections.sort(jsonObjects, new Comparator<JSONObject>() {
                @Override
                public int compare(JSONObject o1, JSONObject o2) {
                    Object value1 = null;
                    try {
                        value1 = o1.get(key1.toString());
                    } catch (JSONException e) {
                        e.printStackTrace();
                    }
                    Object value2 = null;
                    try {
                        value2 = o2.get(key1.toString());
                    } catch (JSONException e) {
                        e.printStackTrace();
                    }
                    if (value1 instanceof Comparable && value2 instanceof Comparable) {
                        return ((Comparable) value1).compareTo(value2);
                    }
                    return 0;
                }
            });

            // 提取 key2 的值并用逗号拼接
            StringBuilder result = new StringBuilder();
            for (int i = 0; i < jsonObjects.size(); i++) {
                JSONObject obj = jsonObjects.get(i);
                if (i > 0) {
                    result.append(",");
                }
                result.append(obj.get(key2.toString()));
            }

            return result.toString();
        } catch (Exception e) {
            return null;
        }
    }
}

一点一滴

hive SQL named_struct COLLECT_LIST 和排序

没有评论