{"id":5733,"date":"2024-08-28T08:33:00","date_gmt":"2024-08-28T00:33:00","guid":{"rendered":"https:\/\/ixyzero.com\/blog\/?p=5733"},"modified":"2024-08-27T10:35:34","modified_gmt":"2024-08-27T02:35:34","slug":"hive-%e4%b8%ad-map-%e7%b1%bb%e5%9e%8b%e5%ad%97%e6%ae%b5%e7%9a%84%e5%b8%b8%e7%94%a8%e6%96%b9%e6%b3%95%e5%ad%a6%e4%b9%a0","status":"publish","type":"post","link":"https:\/\/ixyzero.com\/blog\/archives\/5733.html","title":{"rendered":"Hive \u4e2d map \u7c7b\u578b\u5b57\u6bb5\u7684\u5e38\u7528\u65b9\u6cd5\u5b66\u4e60"},"content":{"rendered":"\n<p>=Start=<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u7f18\u7531\uff1a<\/h4>\n\n\n\n<p>Hive\u652f\u6301\u539f\u59cb\u6570\u636e\u7c7b\u578b\u548c\u590d\u6742\u7c7b\u578b\uff0c\u539f\u59cb\u7c7b\u578b\u5305\u62ec\u6570\u503c\u578b\uff0cBoolean\uff0c\u5b57\u7b26\u4e32\uff0c\u65f6\u95f4\u6233\u3002\u590d\u6742\u7c7b\u578b\u5305\u62ecarray\uff0cmap\uff0cstruct\u3002<\/p>\n\n\n\n<p>\u5b9e\u9645\u5de5\u4f5c\u4e2d\uff0c\u6709\u65f6\u4f1a\u9047\u5230 map \u8fd9\u79cd\u590d\u6742\u6570\u636e\u7c7b\u578b\uff0c\u9664\u4e86\u9700\u8981\u77e5\u9053\u5982\u4f55\u8bbf\u95ee\u5176\u5185\u5bb9\u4e4b\u5916\uff0c\u4e5f\u7b80\u5355\u6574\u7406\u4e00\u4e0b\u5b83\u7684\u5e38\u7528\u65b9\u6cd5\uff0c\u65b9\u4fbf\u540e\u9762\u6709\u9700\u8981\u7684\u65f6\u5019\u53c2\u8003\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u6b63\u6587\uff1a<\/h4>\n\n\n\n<h5 class=\"wp-block-heading\">\u53c2\u8003\u89e3\u7b54\uff1a<\/h5>\n\n\n\n<p>map \u662f\u4e00\u7ec4\u952e\u503c\u5bf9\u7684\u7ec4\u5408\uff0c\u53ef\u4ee5\u901a\u8fc7 key \u8bbf\u95ee value\uff0c\u952e\u503c\u4e4b\u95f4\u540c\u6837\u8981\u5728\u521b\u5efa\u8868\u65f6\u6307\u5b9a\u5206\u9694\u7b26\u3002<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><code>size(Map)<\/code>\u51fd\u6570\uff1a\u53ef\u5f97map\u7684\u957f\u5ea6\u3002\u8fd4\u56de\u503c\u7c7b\u578b\uff1aint<\/li>\n\n\n\n<li>\u83b7\u53d6map\u4e2d\u67d0\u4e2akey\u5bf9\u5e94\u7684value\u5185\u5bb9\uff0cmap\u7684\u5143\u7d20\u8bbf\u95ee\u5219\u4f7f\u7528<code>[]<\/code>\uff0c\u4f8b\u5982 <code>map['key1']<\/code><\/li>\n\n\n\n<li><code>map_keys(Map)<\/code>\u51fd\u6570\uff1a\u53ef\u5f97map\u4e2d\u6240\u6709\u7684key; \u8fd4\u56de\u503c\u7c7b\u578b: array<\/li>\n\n\n\n<li><code>map_values(Map)<\/code>\u51fd\u6570\uff1a\u53ef\u5f97map\u4e2d\u6240\u6709\u7684value; \u8fd4\u56de\u503c\u7c7b\u578b: array<\/li>\n\n\n\n<li>\u5224\u65admap\u4e2d\u662f\u5426\u5305\u542b\u67d0\u4e2akey\u503c\uff1a<br>\u65b9\u6cd5\u4e00\uff1a\u5148\u901a\u8fc7 <code>map_keys()<\/code>\u51fd\u6570 \u5f97\u5230\u4e00\u4e2a\u5305\u542b\u6240\u6709key\u7684\u6570\u7ec4\u53d8\u91cf\uff0c\u7136\u540e\u901a\u8fc7 <code>array_contains()<\/code>\u51fd\u6570 \u5224\u5b9a\u7279\u5b9a\u7684key\u662f\u5426\u5728\u521a\u624d\u7684\u6570\u7ec4\u91cc\u9762\uff1b<br>\u65b9\u6cd5\u4e8c\uff1a\u76f4\u63a5\u7528\u7279\u5b9akey\u53bb\u53d6\u503c\uff0c\u8fd4\u56de\u4e3a NULL \u5219\u53ef\u80fd\u662f\u4e0d\u5305\u542b\uff08\u8fd8\u6709\u4e00\u79cd\u53ef\u80fd\u662fkey\u5b58\u5728\uff0c\u4f46\u662fvalue\u5c31\u662fNULL\uff09\uff0c\u4e0d\u8fc7\u8fd9\u79cd\u65b9\u6cd5\u4e0d\u7cbe\u786e\u3002<\/li>\n<\/ol>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p>\u7528\u4e34\u65f6\u8868\u521b\u5efa\u51e0\u4e2amap\u53d8\u91cf\u7528\u4e8e\u6d4b\u8bd5\u548cmap\u7c7b\u578b\u6570\u636e\u6709\u5173\u7684\u5e38\u7528\u65b9\u6cd5\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>with t1 as (SELECT\nstr_to_map(\"name:zhangsan,age:25\") as m1\n,str_to_map(\"name:lisi,age:23\") as m2\n,map(\"name\",\"wangwu\",\"age\",\"24\") as m3\n,map('K1','V1','K2','V2','K3','V3') as m4\n)\n\nSELECT\nm1, size(m1) as m1_size \n,m2, m2&#91;'name'] as m2_name\n,m3, m3&#91;'name1'] as m3_name1\n,m4\n,map_keys(m4) as m4_keys\n,array_contains(map_keys(m4),'k4') as m4_has_k4\n,m4&#91;'k4'] as m4_k4\n,map_values(m4) as m4_values\nfrom t1<\/code><\/pre>\n\n\n\n<p>\u4e0a\u9762\u7684SQL\u7684\u67e5\u8be2\u7ed3\u679c\u5982\u4e0b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>{\"age\":\"25\",\"name\":\"zhangsan\"}\n2\n{\"age\":\"23\",\"name\":\"lisi\"}\nlisi\n{\"age\":\"24\",\"name\":\"wangwu\"}\nNULL\n{\"K1\":\"V1\",\"K2\":\"V2\",\"K3\":\"V3\"}\n&#91;\"K1\",\"K2\",\"K3\"]\nfalse\nNULL\n&#91;\"V1\",\"V2\",\"V3\"]<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p>\u4e00\u4e2a\u5b9e\u9645\u7684\u573a\u666f\u4e3e\u4f8b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\/*\nurl_params \u662f map \u7c7b\u578b\u7684\u53d8\u91cf\uff0c\u73b0\u5728\u60f3\u7edf\u8ba1\u4e00\u4e0b\u8bf7\u6c42\u53c2\u6570\u91cc\u9762\u7684 key \u90fd\u6709\u54ea\u4e9b\uff0c\u5bf9\u5e94\u7684 value \u53d6\u503c\u90fd\u662f\u4ec0\u4e48\u7279\u70b9\uff08\u5b57\u7b26\u4e32\u957f\u5ea6\u3001\u53d6\u503c\u8303\u56f4\u7b49\uff09\n\u56e0\u6b64\u9700\u8981\u501f\u52a9 explode \u8fdb\u884c\u5207\u5206\uff0c\u7136\u540e\u7edf\u8ba1\n*\/\nSELECT\nurl_path\n,map_keys(url_params) as params_key_arr\n,params_key,params_value\nFROM    security.nginx_access_log\nlateral view explode(url_params) kv as params_key,params_value\nWHERE   date BETWEEN '20240821' AND '20240821'\nAND http_host = 'domain1.ixyzero.com'\nAND size(url_params) > 0 \nLIMIT   100\n\n\n\/*\n\u501f\u52a9 lateral view explode \u5c06 map \u7c7b\u578b\u53d8\u91cf url_params \u5207\u5206\u6210 params_key \u548c params_value \u4e24\u90e8\u5206\uff1b\n\u7136\u540e\u5bf9 params_key \u8fdb\u884c group by \u7edf\u8ba1\u603b\u6b21\u6570\uff08\u9700\u8981\u7528\u6bcf\u4e00\u6761\u65e5\u5fd7\u7684logid\u505a\u53bb\u91cd\u7edf\u8ba1\uff0c\u5426\u5219\u6570\u503c\u4f1a\u88ab\u653e\u5927\u5f88\u591a\u500d\uff09\uff0c\n\u540c\u6b65\u7edf\u8ba1 params_value \u7684\u5b57\u7b26\u4e32\u957f\u5ea6\u7b49\u4fe1\u606f\n*\/\nSELECT\nparams_key\n,count(DISTINCT req_logid) as logid_cnt\n,count(DISTINCT params_value) as params_value_cnt\n,count(DISTINCT url_path) as url_path_cnt\n,sort_array(collect_set(length(params_value))) as params_value_len_set\n,concat(cast(min(length(params_value)) as STRING),'-',cast(max(length(params_value)) as STRING)) as params_value_len_range\n,sort_array(collect_set(length(refer_uid))) as refer_uid_len_set\n,concat(cast(min(length(refer_uid)) as STRING),'-',cast(max(length(refer_uid)) as STRING)) as refer_uid_len_range\nfrom\n(select\nurl_path\n,req_logid\n,params_key,params_value\n,regexp_extract(referer_url, '(\\\\d{8,})', 0) as refer_uid\nFROM    security.nginx_access_log\nlateral view explode(url_params) kv as params_key,params_value\nWHERE   date BETWEEN '20240821' AND '20240821'\nAND http_host = 'domain1.ixyzero.com'\nAND size(url_params) > 0 \n)tmp\nGROUP BY params_key<\/code><\/pre>\n\n\n\n<h5 class=\"wp-block-heading\">\u53c2\u8003\u94fe\u63a5\uff1a<\/h5>\n\n\n\n<p>Hive\u4e2dMap\u51fd\u6570\u7684\u5e94\u7528<br><a href=\"https:\/\/yerias.github.io\/2021\/06\/03\/hive\/25\/\">https:\/\/yerias.github.io\/2021\/06\/03\/hive\/25\/<\/a><\/p>\n\n\n\n<p>Hive\u4e2d\u7684\u96c6\u5408\u6570\u636e\u7c7b\u578b<br><a href=\"https:\/\/www.ikeguang.com\/article\/1611\">https:\/\/www.ikeguang.com\/article\/1611<\/a><\/p>\n\n\n\n<p>Hive\u6570\u636e\u7c7b\u578b[\u901a\u4fd7\u6613\u61c2]<br><a href=\"https:\/\/cloud.tencent.cn\/developer\/article\/2064320\">https:\/\/cloud.tencent.cn\/developer\/article\/2064320<\/a><\/p>\n\n\n\n<p>hive \u4f7f\u7528 map \u7c7b\u578b\u5b57\u6bb5<br><a href=\"https:\/\/cloud.tencent.com\/developer\/article\/1846124\">https:\/\/cloud.tencent.com\/developer\/article\/1846124<\/a><\/p>\n\n\n\n<p>Hive \u6570\u636e\u7c7b\u578b<br><a href=\"https:\/\/www.hadoopdoc.com\/hive\/hive-data-type\">https:\/\/www.hadoopdoc.com\/hive\/hive-data-type<\/a><\/p>\n\n\n\n<p>Hive\u4e4bMap\u5e38\u7528\u65b9\u6cd5<br><a href=\"https:\/\/blog.csdn.net\/weixin_43597208\/article\/details\/126962317\">https:\/\/blog.csdn.net\/weixin_43597208\/article\/details\/126962317<\/a><\/p>\n\n\n\n<p>=END=<\/p>\n","protected":false},"excerpt":{"rendered":"<p>=Start= \u7f18\u7531\uff1a Hive\u652f\u6301\u539f\u59cb\u6570\u636e\u7c7b\u578b\u548c\u590d\u6742\u7c7b\u578b\uff0c\u539f\u59cb\u7c7b\u578b\u5305\u62ec\u6570\u503c\u578b\uff0cBoolean\uff0c\u5b57\u7b26\u4e32\uff0c\u65f6\u95f4\u6233 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[14,23,7],"tags":[1886,1261,342,2319,2320,2318,1262,2321],"class_list":["post-5733","post","type-post","status-publish","format-standard","hentry","category-database","category-knowledgebase-2","category-programing","tag-array_contains","tag-hive","tag-map","tag-map_keys","tag-map_values","tag-size","tag-sql","tag-str_to_map"],"views":2232,"_links":{"self":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5733","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/comments?post=5733"}],"version-history":[{"count":1,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5733\/revisions"}],"predecessor-version":[{"id":5734,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5733\/revisions\/5734"}],"wp:attachment":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/media?parent=5733"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/categories?post=5733"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/tags?post=5733"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}