如果只是返回String,那么直接继承UDF即可,如果想要返回MAP/LIST/STRUCT,则需要继承GenericUDF;
如下代码示例,将URL中的参数进行了解析成了一个MAP返回:
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.Text;
public class UrlParamsToMap extends GenericUDF {
private final Map<Text, Text> sortMap = new LinkedHashMap<Text, Text>();
private StringObjectInspector urlOI;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 1) {
throw new UDFArgumentException("UrlParamsToMap param must be 1 argu.");
}
urlOI = (StringObjectInspector) arguments[0];
return ObjectInspectorFactory.getStandardMapObjectInspector(
PrimitiveObjectInspectorFactory.writableStringObjectInspector,
PrimitiveObjectInspectorFactory.writableStringObjectInspector);
}
@Override
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
Object urlObj = deferredObjects[0].get();
Text url = (Text) urlOI.getPrimitiveWritableObject(urlObj);
getParamsMap(url.toString(), sortMap);
return sortMap;
}
public Map<Text, Text> getParamsMap(String url, Map<Text, Text> sortMap) {
Map<Text, Text> defaultMap = new LinkedHashMap<Text, Text>();
if (StringUtils.isBlank(url)) {
return defaultMap;
}
String[] urlSplits = url.split("\\?");
if (null == urlSplits || urlSplits.length != 2) {
return defaultMap;
}
String urlParamStr = urlSplits[1];
if (StringUtils.isBlank(urlParamStr)) {
return defaultMap;
}
String[] paramSplits = urlParamStr.split("&");
if (null == paramSplits || paramSplits.length == 0) {
return defaultMap;
}
for (String kvStr : paramSplits) {
if (StringUtils.isBlank(kvStr)) {
continue;
}
String[] kvs = kvStr.split("=");
if (null != kvs && kvs.length == 2) {
if (StringUtils.isNotBlank(kvs[0]) && StringUtils.isNotBlank(kvs[1])) {
sortMap.put(new Text(kvs[0]), new Text(kvs[1]));
}
}
}
return sortMap;
}
@Override
public String getDisplayString(String[] strings) {
return "map(" + strings[0] + ")";
}
}
转载请注明来自:疯狂的蚂蚁www.crazyant.net