{"id":5798,"date":"2024-12-04T20:01:00","date_gmt":"2024-12-04T12:01:00","guid":{"rendered":"https:\/\/ixyzero.com\/blog\/?p=5798"},"modified":"2024-12-04T14:45:24","modified_gmt":"2024-12-04T06:45:24","slug":"hive-sql%e5%a6%82%e4%bd%95%e4%bb%8ejson%e6%a0%bc%e5%bc%8f%e7%9a%84%e5%ad%97%e7%ac%a6%e4%b8%b2%e4%b8%ad%e6%8f%90%e5%8f%96%e5%87%ba%e6%89%80%e6%9c%89key%e5%ad%97%e6%ae%b5","status":"publish","type":"post","link":"https:\/\/ixyzero.com\/blog\/archives\/5798.html","title":{"rendered":"Hive SQL\u5982\u4f55\u4ecejson\u683c\u5f0f\u7684\u5b57\u7b26\u4e32\u4e2d\u63d0\u53d6\u51fa\u6240\u6709key\u5b57\u6bb5"},"content":{"rendered":"\n<p>=Start=<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u7f18\u7531\uff1a<\/h4>\n\n\n\n<p>\u5728\u7528Hive SQL\u505a\u6570\u636e\u5206\u6790\u7684\u8fc7\u7a0b\u4e2d\u9047\u5230\u7684\u4e00\u4e2a\u60c5\u51b5\uff0c\u89c9\u5f97\u633a\u5178\u578b\u7684\uff0c\u540e\u9762\u53ef\u80fd\u4f1a\u9047\u5230\uff0c\u6240\u4ee5\u8bb0\u5f55\u4e0b\u6765\uff0c\u65b9\u4fbf\u540e\u9762\u53c2\u8003\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u6b63\u6587\uff1a<\/h4>\n\n\n\n<h5 class=\"wp-block-heading\">\u53c2\u8003\u89e3\u7b54\uff1a<\/h5>\n\n\n\n<h4 class=\"wp-block-heading\">\u6d4b\u8bd5\u6837\u4f8b<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code># file_schema (\u7c7b\u578b1\uff0cstring\u7c7b\u578b\u7684\u5217\u540d\u5217\u8868\uff0c\u7528\u65b9\u62ec\u53f7\u5305\u88f9\u8d77\u6765\uff0c\u4e0d\u9700\u8981\u989d\u5916\u5904\u7406)\n&#91;\"\u4f01\u4e1a\u53f7uid\",\"\u516c\u53f8\u540d\u79f0\"]\n\n# file_schema (\u7c7b\u578b2\uff0cstring\u7c7b\u578b\u5b57\u6bb5\uff0cjson\u5b57\u7b26\u4e32\u683c\u5f0f\uff0c\u9700\u8981\u5c06\u6240\u6709\u7684key\u90fd\u63d0\u53d6\u51fa\u6765)\n{\"activity_type\":\"string\",\"business_tag\":\"string\",\"activity_tags\":\"string\",\"campaign_type\":\"string\",\"tool_name\":\"string\"}\n\n\n-- \u63d0\u53d6\u7ed3\u679c\n# file_columns1\nactivity_type,business_tag,activity_tags,campaign_type,tool_name\n\n# file_columns2\n\"activity_type\",\"business_tag\",\"activity_tags\",\"campaign_type\",\"tool_name\"\n\n# file_columns3\n{\"activity_type\",\"business_tag\",\"activity_tags\",\"campaign_type\",\"tool_name\"}\n\n# file_columns4\n\"activity_type\",\"business_tag\",\"activity_tags\",\"campaign_type\",\"tool_name\"\n\n-- \u63d0\u53d6\u65b9\u6cd5\uff08\u4e3a\u4e86\u663e\u793a\u6e05\u695a\u4f7f\u7528case-when\u8bed\u6cd5\uff0c\u5176\u5b9e\u53ef\u4ee5\u7528\u5355\u884c\u7684if()\u51fd\u6570\u6765\u8fbe\u5230\u76ee\u6807\uff09\nfile_schema,\ncase\nwhen file_schema like '{%' then concat_ws(',',map_keys(str_to_map(replace(substr(file_schema,2),'\"',''))))\nelse file_schema\nend as file_columns1,\ncase\nwhen file_schema like '{%' then concat_ws(',',map_keys(str_to_map(substr(file_schema,2))))\nelse file_schema\nend as file_columns2,\ncase\nwhen file_schema like '{%' then regexp_replace(file_schema,':\"(bigint|tinyint|smallint|int|string|varchar|char|float|double|boolean|decimal|timestamp)\"','')\nelse file_schema\nend as file_columns3,\nregexp_replace(concat_ws(',',map_keys(str_to_map(regexp_replace(file_schema,'\\\\(&#91;a-zA-Z0-9]+,&#91;a-zA-Z0-9]+\\\\)','')))),'\\\\{','') AS file_columns4,<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<pre class=\"wp-block-code\"><code>str_to_map(\u5b57\u7b26\u4e32\u53c2\u6570, \u5206\u9694\u7b261, \u5206\u9694\u7b262)\n\u4f7f\u7528\u4e24\u4e2a\u5206\u9694\u7b26\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u952e\u503c\u5bf9\u3002\n\u5206\u9694\u7b261\u5c06\u6587\u672c\u5206\u6210K-V\u5bf9\uff0c\u5206\u9694\u7b262\u5206\u5272\u6bcf\u4e2aK-V\u5bf9\u3002\u5bf9\u4e8e\u5206\u9694\u7b261\u9ed8\u8ba4\u5206\u9694\u7b26\u662f ','\uff0c\u5bf9\u4e8e\u5206\u9694\u7b262\u9ed8\u8ba4\u5206\u9694\u7b26\u662f ':'\n\n&gt; select str_to_map(\"name:zhangsan,age:25\") as m1\n{\"age\":\"25\",\"name\":\"zhangsan\"}\n\n&gt; desc function str_to_map;\nFunction: str_to_map\nClass: org.apache.spark.sql.catalyst.expressions.StringToMap\nUsage: str_to_map(text&#91;, pairDelim&#91;, keyValueDelim]]) - Creates a map after splitting the text into key\/value pairs using delimiters. Default delimiters are ',' for `pairDelim` and ':' for `keyValueDelim`. Both `pairDelim` and `keyValueDelim` are treated as regular expressions.\n\n\n&gt; select cast(str_to_map(\"A:1,B:1,C:1\") as map&lt;string, int&gt;)\n-- {'A': 1, 'B': 1, 'C': 1}<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u76f8\u5173\u89e3\u91ca<\/h4>\n\n\n\n<p>\u4e0b\u8f7d\u6587\u4ef6\u5217\u540d(file_schema)\u5b57\u6bb5\u7684\u5206\u6790\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u53ea\u6709\u5728\u4e0b\u8f7d\u683c\u5f0f\u4e3aExcel\u7684\u65f6\u5019\uff0c\u624d\u4f1a\u662f\u4e0b\u9762\u8fd9\u79cd\u683c\u5f0f\uff1a<br><code>{\"column1\":\"bigint\",\"column2\":\"int\",\"column3\":\"double\"}<\/code><\/li>\n\n\n\n<li>\u5176\u5b83\u7684\u60c5\u51b5\u4e0b\uff0c\u5c31\u662f\u5355\u7eaf\u7684\u5217\u540d\u683c\u5f0f\uff08\u5df2\u5b8c\u5168\u6ee1\u8db3\u9700\u6c42\uff0c\u4e0d\u7528\u989d\u5916\u5904\u7406\uff09\uff1a<br><code>[\"column1\",\"column2\",\"column3\",\"column4\"]<\/code><\/li>\n<\/ol>\n\n\n\n<p>Hive SQL\u4e2d\u7684\u4e00\u4e9b\u51fd\u6570\u529f\u80fd\u3001\u4f7f\u7528\u573a\u666f\u5b66\u4e60<\/p>\n\n\n\n<p><strong><mark style=\"background-color:rgba(0, 0, 0, 0);color:#ff0000\" class=\"has-inline-color\">str_to_map\u51fd\u6570\u7684\u7b2c\u4e00\u4e2a\u53c2\u6570\u5b57\u7b26\u4e32\u662f\u4e0d\u4ee5\u62ec\u53f7\u5305\u88f9\u7684<\/mark><\/strong>\uff0c\u5c31\u662f\u5355\u7eaf\u7684\uff1a<br><code>key1:value1,key2:value2<\/code><br>\u8fd9\u79cd\u5f62\u5f0f\u3002<\/p>\n\n\n\n<p>\u5f53\u524d\u78b0\u5230\u7684\u4e00\u4e2a\u5b57\u7b26\u4e32\u662f\uff1a<br><code>{\"column1\":\"double\",\"column2\":\"int\",\"column3\":\"double\"}<\/code><br>\u8fd9\u79cd\u5f62\u5f0f\u3002<\/p>\n\n\n\n<h5 class=\"wp-block-heading\">\u65b9\u6cd5\u4e00\uff08\u66f4\u76f4\u63a5\uff09<\/h5>\n\n\n\n<p>\u5206\u6790\u4e86\u4e00\u4e0b\uff0cvalue\u7684\u53d6\u503c\u8303\u56f4\u7edd\u5927\u90e8\u5206\u662fHive\u652f\u6301\u7684\u539f\u59cb\u6570\u636e\u7c7b\u578b\uff08\u6570\u503c\u578b\uff0cBoolean\uff0c\u5b57\u7b26\u4e32\uff0c\u65f6\u95f4\u6233\uff09\uff0c\u6240\u4ee5\u6709\u4e00\u79cd\u65b9\u5f0f\u5c31\u662f<strong>\u628a\u8fd9\u4e9b\u76f8\u5bf9\u56fa\u5b9a\uff08\u53ef\u4ee5\u679a\u4e3e\u51fa\u6765\uff09\u7684\u5185\u5bb9\u8fdb\u884c\u6279\u91cf\u66ff\u6362<\/strong>\uff0c\u5c31\u53ef\u4ee5\u8fbe\u5230\u6211\u7684\u76ee\u6807\u2014\u2014\u53ea\u4fdd\u7559\u952e\u540dkey\u5217\u8868\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>if(file_schema like '{%' , regexp_replace(file_schema,':\"(bigint|tinyint|smallint|int|string|varchar|char|float|double|boolean|decimal|timestamp)\"','') , file_schema) as file_columns<\/code><\/pre>\n\n\n\n<h5 class=\"wp-block-heading\">\u65b9\u6cd5\u4e8c\uff08\u66f4\u901a\u7528\uff09<\/h5>\n\n\n\n<p>\u4e0a\u9762\u8fd9\u79cd\u60c5\u51b5\u6bd4\u8f83\u7279\u6b8a\u2014\u2014value\u7684\u53d6\u503c\u662f\u53ef\u4ee5\u679a\u4e3e\u51fa\u6765\uff0c\u6709\u76f8\u5bf9\u56fa\u5b9a\u8303\u56f4\u7684\u3002\u9664\u6b64\u4e4b\u5916\uff0c<strong>\u5bf9\u4e8evalue\u53d6\u503c\u65e0\u6cd5\u9884\u4f30\u7684\u60c5\u51b5\u4e0b\uff0c\u53ef\u4ee5\u4f7f\u7528\u53e6\u4e00\u4e2a\u66f4\u4e3a\u901a\u7528\u7684\u65b9\u6cd5<\/strong>\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u5728\u7b80\u5355\u628a\u5b57\u7b26\u4e32\u5904\u7406\u4e4b\u540e\uff0c<strong><mark style=\"background-color:rgba(0, 0, 0, 0);color:#ff0000\" class=\"has-inline-color\">\u7528 str_to_map \u51fd\u6570\u5c06\u5b57\u7b26\u4e32\u8f6c\u6362\u6210map\u7c7b\u578b<\/mark><\/strong><\/li>\n\n\n\n<li>\u7136\u540e\u4f7f<strong><mark style=\"background-color:rgba(0, 0, 0, 0);color:#ff0000\" class=\"has-inline-color\">\u7528 map_keys \u51fd\u6570\u53d6\u51fa\u952e\u540dkey\u5217\u8868<\/mark><\/strong><\/li>\n\n\n\n<li>\u518d\u7528 concat_ws \u5c06array\u7c7b\u578b\u8f6c\u6362\u6210string\u7c7b\u578b\u5373\u53ef<\/li>\n\n\n\n<li>\uff08\u53ef\u9009\uff09\u4f7f\u7528 replace\/regexp_replace \u51fd\u6570\u5bf9\u5b57\u7b26\u4e32\u505a\u4e00\u4e9b\u683c\u5f0f\u5316\u5904\u7406<\/li>\n<\/ol>\n\n\n\n<pre class=\"wp-block-code\"><code>if(file_schema like '{%' , concat_ws(',',map_keys(str_to_map(replace(substr(file_schema,2),'\"','')))) , file_schema) as file_columns<\/code><\/pre>\n\n\n\n<h5 class=\"wp-block-heading\">\u53c2\u8003\u94fe\u63a5\uff1a<\/h5>\n\n\n\n<p>Hive Tutorial<br><a href=\"https:\/\/cwiki.apache.org\/confluence\/display\/Hive\/Tutorial\">https:\/\/cwiki.apache.org\/confluence\/display\/Hive\/Tutorial<\/a><\/p>\n\n\n\n<p>Hive str_to_map\u51fd\u6570<br><a href=\"https:\/\/www.cnblogs.com\/kopao\/p\/13753560.html\">https:\/\/www.cnblogs.com\/kopao\/p\/13753560.html<\/a><\/p>\n\n\n\n<p>\u5927\u6570\u636e\u4e4bHive\uff1aHive\u51fd\u6570\u4e4bstr_to_map\u51fd\u6570<br><a href=\"https:\/\/blog.csdn.net\/weixin_43597208\/article\/details\/117450579\">https:\/\/blog.csdn.net\/weixin_43597208\/article\/details\/117450579<\/a><\/p>\n\n\n\n<p>str_to_map returns map. How to make it return map?<br><a href=\"https:\/\/stackoverflow.com\/questions\/38537382\/str-to-map-returns-mapstring-string-how-to-make-it-return-mapstring-int\">https:\/\/stackoverflow.com\/questions\/38537382\/str-to-map-returns-mapstring-string-how-to-make-it-return-mapstring-int<\/a><\/p>\n\n\n\n<p>str_to_map function<br><a href=\"https:\/\/docs.databricks.com\/en\/sql\/language-manual\/functions\/str_to_map.html\">https:\/\/docs.databricks.com\/en\/sql\/language-manual\/functions\/str_to_map.html<\/a><\/p>\n\n\n\n<p>Hive \u4e2d map \u7c7b\u578b\u5b57\u6bb5\u7684\u5e38\u7528\u65b9\u6cd5\u5b66\u4e60<br><a href=\"https:\/\/ixyzero.com\/blog\/archives\/5733.html\">https:\/\/ixyzero.com\/blog\/archives\/5733.html<\/a><\/p>\n\n\n\n<p>hive\u89e3\u6790\u4e0d\u786e\u5b9akey\u7684json\uff0c\u5c06key\u548cvalue\u5206\u522b\u653e\u5165\u4e24\u5217<br><a href=\"https:\/\/blog.csdn.net\/lz6363\/article\/details\/103910313\">https:\/\/blog.csdn.net\/lz6363\/article\/details\/103910313<\/a><\/p>\n\n\n\n<p>Hive \u4ecejson\u4e2d\u63d0\u53d6\u51fa\u6240\u6709key<br><a href=\"https:\/\/www.cnblogs.com\/144823836yj\/p\/14752548.html\">https:\/\/www.cnblogs.com\/144823836yj\/p\/14752548.html<\/a><\/p>\n\n\n\n<p>=END=<\/p>\n","protected":false},"excerpt":{"rendered":"<p>=Start= \u7f18\u7531\uff1a \u5728\u7528Hive SQL\u505a\u6570\u636e\u5206\u6790\u7684\u8fc7\u7a0b\u4e2d\u9047\u5230\u7684\u4e00\u4e2a\u60c5\u51b5\uff0c\u89c9\u5f97\u633a\u5178\u578b\u7684\uff0c\u540e\u9762\u53ef\u80fd\u4f1a\u9047\u5230\uff0c\u6240 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[14,23,7],"tags":[2383,2384,1261,181,2319,1262,2321,19],"class_list":["post-5798","post","type-post","status-publish","format-standard","hentry","category-database","category-knowledgebase-2","category-programing","tag-file_column","tag-file_schema","tag-hive","tag-json","tag-map_keys","tag-sql","tag-str_to_map","tag-tips"],"views":1696,"_links":{"self":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5798","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/comments?post=5798"}],"version-history":[{"count":1,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5798\/revisions"}],"predecessor-version":[{"id":5799,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5798\/revisions\/5799"}],"wp:attachment":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/media?parent=5798"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/categories?post=5798"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/tags?post=5798"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}