多参表达式Hive UDF

440 次阅读 预计阅读时间: 6 分钟


使用说明

支持的操作符

  1. :跳过,即无条件筛选
  2. =:等于
  3. !=:不等于
  4. range:区间内,range[n,m]表示 between n and m
  5. nrange:区间外,即not between and 
  6. in:集合内,in(n,m,j,k)表示 in ('n','m','j','k')
  7. nin:集合外,即not in
  8. regex:正则表达式,regex(^a.*)表示以a字母头

输入与输出

  1. 输入:参数个数成双。每一对参数中,参数1为任意类型的筛选列,参数2为字符串类型的条件列。
  2. 输出:true or false

demo

select
    -- 等于空串
    multi_exp('', '=') as t1,
    -- 空条件key 跳过比较
    multi_exp('abc', '') as t2,
    -- 数字会自动转为string比较
    multi_exp(123, '=123') as t3,
    -- string类型
    multi_exp('abc', '=abc') as t4,
    multi_exp('abc', '!=aaa') as t5,
    multi_exp('23', 'range[1,100]') as t6,
    multi_exp('23', 'nrange[1,100]') as t7,
    multi_exp('abc', 'in(abc,aaa,bbb,ccc)') as t8,
    multi_exp('abc', 'nin(abc,aaa,bbb,ccc)') as t9,
    multi_exp('abc', 'regex(^a.+)') as t10,
    multi_exp(
                'abc', '=abc'
                , 'abc', '!=aaa'
                , '23', 'range[1,100]'
                , 'abc', 'in(abc,aaa,bbb,ccc)'
                , 'abc', 'regex(^a.+)'
        ) t11
    -- map类型
    ,
    multi_exp(str_to_map('k1:v1,k2:v2,k3:v3', ',', ':'), '{"k1":"=v1","k2":"=v2"}') as t12, -- k1=v1 and k2=v2, true
    multi_exp(str_to_map('k1:v1,k2:v2,k3:v3', ',', ':'), '{"k1":"=v1","k2":"=v3"}') as t13  -- k1=v1 但是k2!=v3, false
    -- json格式字符串
    ,
    multi_exp('{"k1":"v1", "k2":"v2", "k3":"v3"}', '{"k1":"=v1","k2":"=v2"}') as t14,  -- k1=v1 and k2=v2, true
    multi_exp('{"k1":"v1", "k2":"v2", "k3":"v3"}', '{"k1":"=v1","k3":"=v2"}') as t15  -- k1=v1 但是k3!=v2, false
;

源码

import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.hive.ql.exec.UDF;

import java.util.*;
import java.util.regex.Pattern;

public class MultiExpUDF extends UDF {

    private static final String IGNORE = "";
    private static final String OPERATOR_EQ = "=";
    private static final String OPERATOR_NE = "!=";
    private static final String OPERATOR_RANGE = "range";
    private static final String OPERATOR_NRANGE = "nrange";
    private static final String OPERATOR_IN = "in";
    private static final String OPERATOR_NIN = "nin";
    private static final String OPERATOR_REGEX = "regex";

    public boolean evaluate(Object... args) {
        if (args.length % 2 != 0) {
            return false;
        }

        for (int i = 0; i < args.length; i += 2) {
            Object originalValue = args[i];
            Object conditionValue = args[i + 1];

            // 明细列处理 空值转空串
            originalValue = originalValue == null ? "" : originalValue;
            // 跳过指定条件key
            if (IGNORE.equals(conditionValue)) {
                continue;
            }

            try {
                if (originalValue instanceof Map) {
                    @SuppressWarnings("unchecked")
                    Map<String, String> originalMap = (Map<String, String>) originalValue;
                    String conditionStr = conditionValue.toString();
                    if (!conditionStr.startsWith("{") || !conditionStr.endsWith("}")) {
                        return false;
                    }
                    JSONObject conditionJson = JSONObject.parseObject(conditionStr);
                    Set<String> keys = conditionJson.keySet();
                    for (String key : keys) {
                        String condition = conditionJson.getString(key);
                        String original = originalMap.get(key);
                        // 处理map中key为null或空的情况
                        original = original == null ? "" : original;
                        if (compares(original, condition)) {
                            return false;
                        }
                    }
                } else {
                    String originalStr = originalValue.toString();
                    try {
                        JSONObject originalJson = JSONObject.parseObject(originalStr);
                        String conditionStr = conditionValue.toString();
                        if (conditionStr.startsWith("{") && conditionStr.endsWith("}")) {
                            JSONObject conditionJson = JSONObject.parseObject(conditionStr);
                            Set<String> keys = conditionJson.keySet();
                            for (String key : keys) {
                                String condition = conditionJson.getString(key);
                                String original = originalJson.getString(key);
                                // 处理json中key为null或空的情况
                                original = original == null ? "" : original;
                                if (compares(original, condition)) {
                                    return false;
                                }
                            }
                        } else {
                            if (compares(originalStr, conditionStr)) {
                                return false;
                            }
                        }
                    } catch (Exception e) {
                        if (compares(originalStr, conditionValue.toString())) {
                            return false;
                        }
                    }
                }
            } catch (Exception e) {
                return false;
            }
        }
        return true;
    }

    private boolean compares(String original, String condition) {
        if (condition.startsWith(OPERATOR_EQ)) {
            String expected = condition.substring(1);
            if (expected.isEmpty()) {
                return !original.isEmpty();
            }
            return !original.equals(expected);
        } else if (condition.startsWith(OPERATOR_NE)) {
            String expected = condition.substring(2);
            if (expected.isEmpty()) {
                return original.isEmpty();
            }
            return original.equals(expected);
        } else if (condition.startsWith(OPERATOR_RANGE)) {
            String rangeStr = condition.substring(OPERATOR_RANGE.length() + 1, condition.length() - 1);
            String[] range = rangeStr.split(",");
            if (range.length == 2) {
                try {
                    Comparable<?> originalVal = getComparable(original);
                    Comparable<?> lower = getComparable(range[0]);
                    Comparable<?> upper = getComparable(range[1]);
                    if (originalVal.getClass().equals(lower.getClass()) && originalVal.getClass().equals(upper.getClass())) {
                        @SuppressWarnings("unchecked")
                        int lowerCompare = ((Comparable<Object>) originalVal).compareTo(lower);
                        @SuppressWarnings("unchecked")
                        int upperCompare = ((Comparable<Object>) originalVal).compareTo(upper);
                        return lowerCompare < 0 || upperCompare > 0;
                    }
                } catch (Exception e) {
                    return true;
                }
            }
            return true;
        } else if (condition.startsWith(OPERATOR_NRANGE)) {
            String rangeStr = condition.substring(OPERATOR_NRANGE.length() + 1, condition.length() - 1);
            String[] range = rangeStr.split(",");
            if (range.length == 2) {
                try {
                    Comparable<?> originalVal = getComparable(original);
                    Comparable<?> lower = getComparable(range[0]);
                    Comparable<?> upper = getComparable(range[1]);
                    if (originalVal.getClass().equals(lower.getClass()) && originalVal.getClass().equals(upper.getClass())) {
                        @SuppressWarnings("unchecked")
                        int lowerCompare = ((Comparable<Object>) originalVal).compareTo(lower);
                        @SuppressWarnings("unchecked")
                        int upperCompare = ((Comparable<Object>) originalVal).compareTo(upper);
                        return lowerCompare >= 0 && upperCompare <= 0;
                    }
                } catch (Exception e) {
                    return true;
                }
            }
            return true;
        } else if (condition.startsWith(OPERATOR_IN)) {
            String inStr = condition.substring(OPERATOR_IN.length() + 1, condition.length() - 1);
            List<String> values = Arrays.asList(inStr.split(","));
            return !values.contains(original);
        } else if (condition.startsWith(OPERATOR_NIN)) {
            String ninStr = condition.substring(OPERATOR_NIN.length() + 1, condition.length() - 1);
            List<String> values = Arrays.asList(ninStr.split(","));
            return values.contains(original);
        } else if (condition.startsWith(OPERATOR_REGEX)) {
            String patternStr = condition.substring(OPERATOR_REGEX.length() + 1, condition.length() - 1);
            Pattern pattern = Pattern.compile(patternStr);
            return !pattern.matcher(original).find();
        }
        return true;
    }

    private Comparable<?> getComparable(String value) {
        try {
            return Integer.parseInt(value);
        } catch (NumberFormatException e) {
            try {
                return Double.parseDouble(value);
            } catch (NumberFormatException ex) {
                return value;
            }
        }
    }
//
    public static void main(String[] args) {
        MultiExpUDF udf = new MultiExpUDF();
        // 测试 普通字符串
        boolean result1 = udf.evaluate( "1", null);
        System.out.println("非 map 或 json 字符串测试结果: " + result1);
    }
//        // 测试 含正则
//        boolean result2 = udf.evaluate("abc", "=abc", "aabcdef", "regex(cd.*)");
//        System.out.println("含正则输入测试结果: " + result2);
//
//        // 测试 map 类型
//        Map<String, String> map = new HashMap<>();
//        map.put("key1", "abc");
//        map.put("key2", "efg");
//        map.put("key3", "ab");
//        boolean result3 = udf.evaluate(map, "{\"key1\":\"=abc\",\"key2\":\"!=bcd\",\"key3\":\"in(ab,b)\"}");
//        System.out.println("map 类型测试结果: " + result3);
//
//        // 测试 json 格式字符串测试
//        String jsonStr = "{\"key1\":\"abc\",\"key2\":\"efg\",\"key3\":\"5\"}";
//        boolean result4 = udf.evaluate(jsonStr, "{\"key1\":\"=abc\",\"key2\":\"!=bcd\",\"key3\":\"range[5,10]\"}");
//        System.out.println("json 格式字符串测试结果: " + result4);
//
//        // 测试 map 中 key 为空的情况
//        Map<String, String> map2 = new HashMap<>();
//        map2.put("key1", "abc");
//        map2.put("key2", null);
//        boolean result6 = udf.evaluate(map2, "{\"key1\":\"=abc\",\"key2\":\"=\"}");
//        System.out.println("测试 map 中 key 为空的情况: " + result6);
//
//        // 测试 json 中 key 为空的情况
//        String jsonStr2 = "{\"key1\":\"abc\",\"key2\":null}";
//        boolean result7 = udf.evaluate(jsonStr2, "{\"key1\":\"=abc\",\"key3\":\"=\"}");
//        System.out.println("测试 json 中 key 为空的情况: " + result7);
//
//        // 测试 not in 功能
//        boolean result8 = udf.evaluate("abc1", "nin(abc,def,ghi)");
//        System.out.println("测试 not in 功能结果: " + result8);
//
//        // 测试 not between 功能
//        boolean result9 = udf.evaluate("3", "nrange[4,6]");
//        System.out.println("测试 not between 功能结果: " + result9);
//    }
}