{"id":5806,"date":"2024-12-13T12:05:03","date_gmt":"2024-12-13T04:05:03","guid":{"rendered":"https:\/\/ixyzero.com\/blog\/?p=5806"},"modified":"2024-12-13T12:05:03","modified_gmt":"2024-12-13T04:05:03","slug":"hive-sql%e7%9a%84%e4%b8%80%e4%ba%9b%e5%87%bd%e6%95%b0%e7%9a%84%e4%bd%bf%e7%94%a8%e5%ad%a6%e4%b9%a0","status":"publish","type":"post","link":"https:\/\/ixyzero.com\/blog\/archives\/5806.html","title":{"rendered":"Hive SQL\u7684\u4e00\u4e9b\u51fd\u6570\u7684\u4f7f\u7528\u5b66\u4e60"},"content":{"rendered":"\n<p>=Start=<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u7f18\u7531\uff1a<\/h4>\n\n\n\n<p>\u770b\u5230\u4e00\u5904\u4e0d\u9519\u7684\u8bbe\u8ba1\uff0c\u7b80\u5355\u6574\u7406\u5b66\u4e60\u4e00\u4e0b\u5176\u4e2d\u6211\u8fd8\u4e0d\u592a\u719f\u6089\u7684\u4e00\u4e9b\u77e5\u8bc6\u70b9\uff0c\u65b9\u4fbf\u540e\u9762\u6709\u9700\u8981\u7684\u65f6\u5019\u53ef\u4ee5\u501f\u9274\u548c\u53c2\u8003\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u6b63\u6587\uff1a<\/h4>\n\n\n\n<h5 class=\"wp-block-heading\">\u53c2\u8003\u89e3\u7b54\uff1a<\/h5>\n\n\n\n<h4 class=\"wp-block-heading\">\u59cb\u4e8e\u5373\u5e2d\u67e5\u8be2SQL\u8bed\u53e5\u7684\u5206\u6790<\/h4>\n\n\n\n<p>\u4ece\u5e93\u8868\u5b57\u6bb5\u7684\u521b\u5efa\u5f00\u59cb\uff0c\u60f3\u7740\u540e\u9762\u4f7f\u7528\u7684\u65f6\u5019\u600e\u4e48\u4f1a\u66f4\u65b9\u4fbf<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>step1. \u5efa\u8868-\u7ed9\u5b57\u6bb5\u8bbe\u5b9a\u7c7b\u578b\nCREATE TABLE IF NOT EXISTS datasec.adhoc_sql_parse (\n...\n,user_id_info  ARRAY &lt; STRING &gt;  COMMENT 'user_id_info'\n...\n)\n\nstep2. \u67e5\u8be2\u540e\u586b\u5145-\u7ed9\u5b57\u6bb5\u586b\u5145\u5185\u5bb9\n,sort_array(collect_set(case when nickname is null then null else to_json(named_struct('user_id',uid, 'nickname',nickname, 'user_type',user_type, 'is_secret',is_secret, 'status',status)) end)) as user_id_info\n\nstep3. \u4f7f\u7528\nSELECT\nget_json_object(uid_info1,'$.user_id') as uid,\nget_json_object(uid_info1,'$.nickname') as nickname,\nget_json_object(uid_info1,'$.user_type') as user_type,\nget_json_object(uid_info1,'$.is_secret') as is_secret,\nget_json_object(uid_info1,'$.status') as status,\nuid_info1,\nuser_id_info,\ndate\nFROM\n  datasec.adhoc_sql_parse\nlateral view explode(user_id_info) uid_info as uid_info1\nWHERE\n  date BETWEEN '20241201' AND '20241201'\n  and get_json_object(uid_info1,'$.user_id') not in ('10000001','10000000001')\n  and (\n    query_table rlike 'gmv|pay|order|amount|price|address|mobile|phone|salary|income'\n  )\n  and (\n    (user_id_info is not NULL and size(user_id_info) &gt;0)\n  )<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u5b66\u4e60\u5176\u4e2d\u6d89\u53ca\u7684Hive SQL\u51fd\u6570<\/h4>\n\n\n\n<h5 class=\"wp-block-heading\">named_struct \u51fd\u6570<\/h5>\n\n\n\n<p><strong>\u521b\u5efa\u5177\u6709\u6307\u5b9a\u7684\u5b57\u6bb5\u540d\u79f0(Key)\u548c\u503c(Value)\u7684\u7ed3\u6784(struct)<\/strong>\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>&gt; SELECT named_struct('a', 1, 'b', 2, 'c', 3);\n {\"a\":1,\"b\":2,\"c\":3}<\/code><\/pre>\n\n\n\n<h5 class=\"wp-block-heading\">to_json \u51fd\u6570<\/h5>\n\n\n\n<p><strong>\u5c06 Hive \u7ed3\u6784\uff08\u4f8b\u5982\uff1aLIST\uff0cMAP\uff0cNAMED_STRUCT \u7b49\uff09\u8f6c\u6362\u6210\u4e3a JSON\u5b57\u7b26\u4e32<\/strong>\uff0c\u4e00\u822c\u4f1a\u914d\u5408\u53e6\u4e00\u4e2a named_struct()\u51fd\u6570\uff08\u81ea\u5b9a\u4e49\u7ed3\u6784\u5316\u6570\u636e\u7684KV\uff09\u4e00\u8d77\u4f7f\u7528<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>##### \u8bed\u6cd5\n\nto_json(expr &#91;, options] )\n\n##### \u53c2\u6570\n* expr\uff1aSTRUCT \u8868\u8fbe\u5f0f\uff0c\u6216\u8005 Databricks SQL \u548c Databricks Runtime 15.3 \u53ca\u66f4\u9ad8\u7248\u672c\u4e2d\u7684 VARIANT\u3002\n* options\uff1a\u4e00\u4e2a\u53ef\u9009\u7684 MAP \u6587\u672c\u8868\u8fbe\u5f0f\uff0c\u5176\u952e\u548c\u503c\u4e3a STRING\u3002 \u5982\u679c expr \u662f VARIANT\uff0c\u5219\u5ffd\u7565\u8fd9\u4e9b\u9009\u9879\u3002\n\n##### \u8fd4\u56de\n\u4e00\u4e2a STRING\u3002\n\n##### \u793a\u4f8b\n\n&gt; SELECT to_json(named_struct('a', 1, 'b', 2));\n {\"a\":1,\"b\":2}\n\n&gt; SELECT to_json(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd\/MM\/yyyy'));\n {\"time\":\"26\/08\/2015\"}\n\n&gt; SELECT to_json(array(named_struct('a', 1, 'b', 2)));\n &#91;{\"a\":1,\"b\":2}]\n\n&gt; SELECT to_json(map('a', named_struct('b', 1)));\n {\"a\":{\"b\":1}}\n\n&gt; SELECT to_json(map(named_struct('a', 1),named_struct('b', 2)));\n {\"&#91;1]\":{\"b\":2}}\n\n&gt; SELECT to_json(map('a', 1));\n {\"a\":1}\n\n&gt; SELECT to_json(array((map('a', 1))));\n &#91;{\"a\":1}]\n\n-- VARIANT input\n&gt; SELECT to_json(parse_json('{\"key\": 123, \"data\": &#91;4, 5, \"str\"]}'))\n  {\"data\":&#91;4,5,\"str\"],\"key\":123}<\/code><\/pre>\n\n\n\n<h5 class=\"wp-block-heading\">Hive\u4e2d\u5e38\u7528\u4e8ejson\u683c\u5f0f\u5b57\u7b26\u4e32\u5904\u7406\u7684\u51fd\u6570<\/h5>\n\n\n\n<pre class=\"wp-block-code\"><code>1. string\u7c7b\u578b -&gt; json\n* named_struct: \u751f\u6210key\u548cvalue\u7684struct\u7ed3\u6784\uff0c\u4f20\u9012\u53c2\u6570\u4e3a named_struct(key1,value1,key2,value2, ..., keyN,valueN)\n* to_json: \u5c06\u5404\u79cd\u590d\u6742\u7ed3\u6784\uff08\u4f8b\u5982\uff1aLIST\uff0cMAP\uff0cNAMED_STRUCT \u7b49\uff09\u8f6c\u6362\u6210json\u683c\u5f0f\n\nselect to_json(named_struct('a', 1, 'b', 2));\n\n2. map\u7c7b\u578b -&gt; json\n* str_to_map \u53ef\u4ee5\u5148\u5c06string\u5207\u5272\u6210map\uff0c\u518d\u8f6cjson\n* to_json: \u5c06\u5404\u79cd\u590d\u6742\u7ed3\u6784\uff08\u4f8b\u5982\uff1aLIST\uff0cMAP\uff0cNAMED_STRUCT \u7b49\uff09\u8f6c\u6362\u6210json\u683c\u5f0f\n\nselect to_json(str_to_map(\"aaaa_-100#bbbb_\u9886\u523829\u51cf8#CCCC_29\u5206\u949f#CCCC_50\u5206\u949f\",'#','_')) as json_test\n\nHive\u4e2d\u5904\u7406json\u6570\u636e\u7684\u4e24\u79cd\u65b9\u5f0f\n\n\u7b2c\u4e00\u79cd\uff1a\u5c06json\u6570\u636e\u4f5c\u4e3a\u5b57\u7b26\u4e32\u8fdb\u884c\u5904\u7406\n* get_json_object - \u4e00\u6b21\u53d6\u4e00\u4e2a\u5b57\u6bb5\n* json_tuple - \u4e00\u6b21\u53d6\u591a\u4e2a\u5b57\u6bb5\n\n\u7b2c\u4e8c\u79cd\uff1a\u901a\u8fc7\u4e13\u95e8\u7684\u89e3\u6790\u7c7b\u76f4\u63a5\u52a0\u8f7d\u4e00\u4e2ajson\u683c\u5f0f\u7684\u6570\u636e\u5230Hive\u4e2d<\/code><\/pre>\n\n\n\n<h5 class=\"wp-block-heading\">\u53c2\u8003\u94fe\u63a5\uff1a<\/h5>\n\n\n\n<p>Azure Databricks SQL \u5185\u7f6e\u51fd\u6570 #nice<br><a href=\"https:\/\/learn.microsoft.com\/zh-cn\/azure\/databricks\/sql\/language-manual\/sql-ref-functions-builtin\">https:\/\/learn.microsoft.com\/zh-cn\/azure\/databricks\/sql\/language-manual\/sql-ref-functions-builtin<\/a><\/p>\n\n\n\n<p>to_json \u51fd\u6570<br><a href=\"https:\/\/learn.microsoft.com\/zh-cn\/azure\/databricks\/sql\/language-manual\/functions\/to_json\">https:\/\/learn.microsoft.com\/zh-cn\/azure\/databricks\/sql\/language-manual\/functions\/to_json<\/a><\/p>\n\n\n\n<p>Tencent-\u65e5\u5fd7\u670d\u52a1\uff08Cloud Log Service\uff0cCLS\uff09-&gt;\u7edf\u8ba1\u5206\u6790\uff08SQL\uff09&gt;SQL \u51fd\u6570<br><a href=\"https:\/\/cloud.tencent.com\/document\/product\/614\/44063\">https:\/\/cloud.tencent.com\/document\/product\/614\/44063<\/a><\/p>\n\n\n\n<p>alibabacloud-\u65e5\u5fd7\u670d\u52a1SLS\u662f\u4e91\u539f\u751f\u89c2\u6d4b\u4e0e\u5206\u6790\u5e73\u53f0-&gt;SQL\u51fd\u6570<br><a href=\"https:\/\/www.alibabacloud.com\/help\/zh\/sls\/user-guide\/sql-function\/\">https:\/\/www.alibabacloud.com\/help\/zh\/sls\/user-guide\/sql-function\/<\/a><\/p>\n\n\n\n<p>hiveSql\u51b7\u95e8\u4f46\u597d\u7528\u51fd\u6570 &#8211;\u6301\u7eed\u66f4\u65b0<br><a href=\"https:\/\/blog.csdn.net\/i_mycode\/article\/details\/128107201\">https:\/\/blog.csdn.net\/i_mycode\/article\/details\/128107201<\/a><\/p>\n\n\n\n<p>Hive\u2013map\/string\u2192json&amp;\u5904\u7406Json\u6570\u636e\u7684\u51fd\u6570 #nice<br><a href=\"https:\/\/blog.csdn.net\/qq_46893497\/article\/details\/109959763\">https:\/\/blog.csdn.net\/qq_46893497\/article\/details\/109959763<\/a><\/p>\n\n\n\n<p>Converting data from multiple Hive Tables to Complex JSON<br><a href=\"https:\/\/stackoverflow.com\/questions\/25188734\/converting-data-from-multiple-hive-tables-to-complex-json\">https:\/\/stackoverflow.com\/questions\/25188734\/converting-data-from-multiple-hive-tables-to-complex-json<\/a><\/p>\n\n\n\n<p>Hive and JSON made simple<br><a href=\"https:\/\/brickhouseconfessions.wordpress.com\/2014\/02\/07\/hive-and-json-made-simple\/\">https:\/\/brickhouseconfessions.wordpress.com\/2014\/02\/07\/hive-and-json-made-simple\/<\/a><\/p>\n\n\n\n<p>Hive Operators and User-Defined Functions (UDFs)<br><a href=\"https:\/\/cwiki.apache.org\/confluence\/display\/hive\/languagemanual+udf#LanguageManualUDF-json_tuple\">https:\/\/cwiki.apache.org\/confluence\/display\/hive\/languagemanual+udf#LanguageManualUDF-json_tuple<\/a><\/p>\n\n\n\n<p>Hive\u4e4b\u51fd\u6570\u89e3\u6790<br><a href=\"https:\/\/www.cnblogs.com\/OnePandas\/p\/17953263\">https:\/\/www.cnblogs.com\/OnePandas\/p\/17953263<\/a><\/p>\n\n\n\n<p>=END=<\/p>\n","protected":false},"excerpt":{"rendered":"<p>=Start= \u7f18\u7531\uff1a \u770b\u5230\u4e00\u5904\u4e0d\u9519\u7684\u8bbe\u8ba1\uff0c\u7b80\u5355\u6574\u7406\u5b66\u4e60\u4e00\u4e0b\u5176\u4e2d\u6211\u8fd8\u4e0d\u592a\u719f\u6089\u7684\u4e00\u4e9b\u77e5\u8bc6\u70b9\uff0c\u65b9\u4fbf\u540e\u9762\u6709\u9700\u8981\u7684\u65f6\u5019 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[14,23],"tags":[2389,1261,181,342,2390,1262,2321,19,1887],"class_list":["post-5806","post","type-post","status-publish","format-standard","hentry","category-database","category-knowledgebase-2","tag-array","tag-hive","tag-json","tag-map","tag-named_struct","tag-sql","tag-str_to_map","tag-tips","tag-to_json"],"views":1019,"_links":{"self":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5806","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/comments?post=5806"}],"version-history":[{"count":1,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5806\/revisions"}],"predecessor-version":[{"id":5807,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5806\/revisions\/5807"}],"wp:attachment":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/media?parent=5806"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/categories?post=5806"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/tags?post=5806"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}