{"id":4248,"date":"2019-01-13T08:27:50","date_gmt":"2019-01-13T00:27:50","guid":{"rendered":"https:\/\/ixyzero.com\/blog\/?p=4248"},"modified":"2019-01-13T08:27:50","modified_gmt":"2019-01-13T00:27:50","slug":"hive-sql%e5%ad%a6%e4%b9%a0%e6%95%b4%e7%90%86_3","status":"publish","type":"post","link":"https:\/\/ixyzero.com\/blog\/archives\/4248.html","title":{"rendered":"Hive SQL\u5b66\u4e60\u6574\u7406_3"},"content":{"rendered":"<p>=Start=<\/p>\n<h4 id=\"id-\u6a21\u677f-\u7f18\u7531\uff1a\">\u7f18\u7531\uff1a<\/h4>\n<p>\u7ee7\u7eed\u6574\u7406\u6700\u8fd1\u5b66\u5230\u6216\u662f\u7528\u5230\u7684Hive SQL\u77e5\u8bc6\uff0c\u65b9\u4fbf\u4ee5\u540e\u53c2\u8003\u3002<\/p>\n<h4 id=\"id-\u6a21\u677f-\u6b63\u6587\uff1a\">\u6b63\u6587\uff1a<\/h4>\n<h5 id=\"id-\u6a21\u677f-\u53c2\u8003\u89e3\u7b54\uff1a\">\u53c2\u8003\u89e3\u7b54\uff1a<\/h5>\n<h6><span style=\"color: #ff0000;\"><strong>1\u3001Hive SQL\u4e2d\u5982\u4f55\u5224\u65ad\u67d0\u4e00\u5217\u4e2d\u662f\u5426\u5305\u542b\u67d0\u4e2a\u5173\u952e\u5b57\uff1f<\/strong><\/span><\/h6>\n<p>\u601d\u8def\u5927\u6982\u67092\u79cd\uff08\u5b57\u7b26\u4e32\u67e5\u627e\u3001\u6a21\u7cca\u5339\u914d\/\u6b63\u5219\u5339\u914d\uff09\uff0c\u5177\u4f53\u7684\u5b9e\u73b0\u65b9\u6cd5\u5c31\u591a\u4e00\u4e9b\uff0c\u4ee5\u4e0b\u662f\u6211\u6574\u7406\u7684\u51e0\u79cd\uff1a<\/p>\n<pre class=\"lang:default decode:true\">-- \u4f7f\u7528like\r\nwhere col_name like '%key_word%'\r\n\r\n-- \u4f7f\u7528instr\r\nwhere instr(col_name,'key_word') is not null and instr(col_name,'key_word') &gt; 0\r\n\r\n-- \u4f7f\u7528locate\r\nwhere locate('key_word',col_name) &gt; 0\r\n\r\n\/*\r\ninstr(string str, string substr) -- \u8fd4\u56desubstr\u5728str\u4e2d\u7b2c\u4e00\u6b21\u51fa\u73b0\u7684\u4f4d\u7f6e\u3002\u82e5\u4efb\u4f55\u53c2\u6570\u4e3anull\u8fd4\u56denull\uff0c\u82e5substr\u4e0d\u5728str\u4e2d\u8fd4\u56de0\u3002str\u4e2d\u7b2c\u4e00\u4e2a\u5b57\u7b26\u7684\u4f4d\u7f6e\u4e3a1\r\nlocate(string substr, string str[, int pos]) -- \u8fd4\u56de substr \u5728 str \u7684\u4f4d\u7f6epos\u540e\u7b2c\u4e00\u6b21\u51fa\u73b0\u7684\u4f4d\u7f6e\r\nfind_in_set(string str, string strList) -- \u8fd4\u56destr\u5728strList\u4e2d\u7b2c\u4e00\u6b21\u51fa\u73b0\u7684\u4f4d\u7f6e\uff0cstrList\u4e3a\u7528\u9017\u53f7\u5206\u9694\u7684\u5b57\u7b26\u4e32\uff0c\u5982\u679cstr\u5305\u542b\u9017\u53f7\u5219\u8fd4\u56de0\uff0c\u82e5\u4efb\u4f55\u53c2\u6570\u4e3anull\uff0c\u8fd4\u56denull\u3002\u5982\uff1a find_in_set('ab', 'abc,b,ab,c,def') \u8fd4\u56de3\r\n*\/<\/pre>\n<p>&amp;<\/p>\n<pre class=\"lang:default decode:true\">select locate('a','abcd'), locate('b', 'abcd'), locate('f', 'abcd')\r\n-- 1\t2\t0\r\n\r\nselect instr('abcd','a'), instr('abcd',null), instr('abcd','f')\r\n-- 1\tNULL\t0\r\n\r\nselect find_in_set('ab','ef,ab,de'), find_in_set('at','ef,ab,de')\r\n-- 2\t0\r\n\r\nselect find_in_set('ab','ef,ab,de'), find_in_set('at','ef,ab,de'), find_in_set('at','not_comma-delimited_string')\r\n-- 2\t0\t0\r\n\r\nselect find_in_set('com','not_comma-delimited_string')\r\n-- 0\r\n\r\n\/*\r\ninstr(string str, string substr) -- \u8fd4\u56desubstr\u5728str\u4e2d\u7b2c\u4e00\u6b21\u51fa\u73b0\u7684\u4f4d\u7f6e\u3002\u82e5\u4efb\u4f55\u53c2\u6570\u4e3anull\u8fd4\u56denull\uff0c\u82e5substr\u4e0d\u5728str\u4e2d\u8fd4\u56de0\u3002str\u4e2d\u7b2c\u4e00\u4e2a\u5b57\u7b26\u7684\u4f4d\u7f6e\u4e3a1\r\n\r\nlocate(string substr, string str[, int pos]) -- \u8fd4\u56de substr \u5728 str \u7684\u4f4d\u7f6epos\u540e\u7b2c\u4e00\u6b21\u51fa\u73b0\u7684\u4f4d\u7f6e\r\n\r\nfind_in_set(string str, string strList) -- \u8fd4\u56destr\u5728strList\u4e2d\u7b2c\u4e00\u6b21\u51fa\u73b0\u7684\u4f4d\u7f6e\uff0cstrList\u4e3a\u7528\u9017\u53f7\u5206\u9694\u7684\u5b57\u7b26\u4e32\uff0c\u5982\u679cstr\u5305\u542b\u9017\u53f7\u5219\u8fd4\u56de0\uff0c\u82e5\u4efb\u4f55\u53c2\u6570\u4e3anull\uff0c\u8fd4\u56denull\u3002\u5982\uff1a find_in_set('ab', 'abc,b,ab,c,def') \u8fd4\u56de3\r\n*\/<\/pre>\n<h6><strong>2\u3001\u6761\u4ef6\u5224\u65adCase When Else End\u7684\u7528\u6cd5<\/strong><\/h6>\n<pre class=\"lang:default decode:true\">-- update\u64cd\u4f5c\uff1a\r\nupdate table_name\r\nset \u5b57\u6bb51=case\r\nwhen \u6761\u4ef61 then \u503c1\r\nwhen \u6761\u4ef62 then \u503c2\r\nelse \u503c3\r\nend\r\nwhere ...\r\n\r\n-- select\u64cd\u4f5c\uff1a\r\nselect \u5b57\u6bb51, \u5b57\u6bb52,\r\ncase \u5b57\u6bb53\r\nwhen \u503c1 then \u65b0\u503c\r\nwhen \u503c2 then \u65b0\u503c\r\nend as \u91cd\u65b0\u547d\u540d\u5b57\u6bb53\u7684\u540d\u5b57\r\nfrom table_name\r\nwhere ...\r\norder by ...\r\n<\/pre>\n<h6><span style=\"color: #ff0000;\"><strong>3\u3001\u7528\u6b63\u5219\u8fdb\u884c\u5b57\u7b26\u4e32\u63d0\u53d6\u3001\u66ff\u6362<\/strong><\/span><\/h6>\n<pre class=\"lang:default decode:true\">\/*\r\nregexp_extract(string subject, string pattern, int index)\r\n\r\n\u4f7f\u7528pattern\u4ece\u7ed9\u5b9a\u5b57\u7b26\u4e32\u4e2d\u63d0\u53d6\u5b57\u7b26\u4e32\u3002\u5982\uff1a regexp_extract('foothebar', 'foo(.*?)(bar)', 2) \u8fd4\u56de'bar'\u3002\u6709\u65f6\u9700\u8981\u4f7f\u7528\u9884\u5b9a\u4e49\u7684\u5b57\u7b26\u7c7b\uff1a\u4f7f\u7528'\\s' \u505a\u4e3a\u7b2c\u4e8c\u4e2a\u53c2\u6570\u5c06\u5339\u914ds\uff0c'\\\\s'\u5339\u914d\u7a7a\u683c\u7b49\u3002\u53c2\u6570index\u662fJava\u6b63\u5219\u5339\u914d\u5668\u65b9\u6cd5group()\u65b9\u6cd5\u4e2d\u7684\u7d22\u5f15\u3002\r\n\u5728\u6709\u4e9b\u60c5\u51b5\u4e0b\u8981\u4f7f\u7528\u8f6c\u4e49\u5b57\u7b26\uff0c\u9700\u8981\u7b26\u5408java\u6b63\u5219\u8868\u8fbe\u5f0f\u7684\u89c4\u5219\u3002\r\n*\/\r\nselect regexp_extract('foothebar', 'foo(.*?)(bar)', 1);\r\n-- the\r\nselect regexp_extract('foothebar', 'foo(.*?)(bar)', 2);\r\n-- bar\r\nselect regexp_extract('foothebar', 'foo(.*?)(bar)', 0);\r\n-- foothebar\r\n\r\n\r\n\/*\r\nregexp_replace(string INITIAL_STRING, string PATTERN, string REPLACEMENT)\r\n\r\n\u4f7f\u7528 REPLACEMENT \u66ff\u6362\u5b57\u7b26\u4e32 INITIAL_STRING \u4e2d\u5339\u914d PATTERN \u7684\u5b50\u4e32\u3002\u6ce8\u610f\uff0c\u5728\u6709\u4e9b\u60c5\u51b5\u4e0b\u8981\u4f7f\u7528\u8f6c\u4e49\u5b57\u7b26\u3002\r\n*\/\r\nselect regexp_replace(\"foobar\", \"oo|ar\", \"\");\r\n-- 'fb'\r\n<\/pre>\n<h6><span style=\"color: #ff0000;\"><strong>4\u3001Hive SQL\u4e2d\u7684\u5b50\u67e5\u8be2<\/strong><\/span><\/h6>\n<pre class=\"lang:default decode:true\">\/*\r\nFROM\u5b50\u53e5\u4e2d\u7684\u5b50\u67e5\u8be2(Subqueries in the FROM Clause)\r\n\r\nSELECT ... FROM (subquery) name ...\r\nSELECT ... FROM (subquery) AS name ...   (Note: Only valid starting with Hive 0.13.0)\r\n*\/\r\nSELECT col\r\nFROM (\r\n  SELECT a+b AS col\r\n  FROM t1\r\n) t2\r\n\r\n\r\n\/*\r\nWHERE\u5b50\u53e5\u4e2d\u7684\u5b50\u67e5\u8be2(Subqueries in the WHERE Clause)\r\n\r\n*\/\r\nSELECT *\r\nFROM A\r\nWHERE A.a IN (SELECT foo FROM B);\r\n\r\nSELECT A\r\nFROM T1\r\nWHERE EXISTS (SELECT B FROM T2 WHERE T1.X = T2.Y)\r\n<\/pre>\n<h6><strong>5\u3001\u6742\u9879<\/strong><\/h6>\n<pre class=\"lang:default decode:true \">\/*\r\nreplace(string A, string OLD, string NEW)\r\n\r\n\u5e38\u89c4\u7684\u5b57\u7b26\u4e32\u66ff\u6362\uff0c\u4f7f\u7528 NEW \u66ff\u6362\u5b57\u7b26\u4e32 A \u4e2d\u7684 OLD \u3002\r\n*\/\r\nselect replace(\"ababab\", \"abab\", \"Z\");\r\n-- 'Zab'\r\n\r\n\r\n\/*\r\nlevenshtein(string A, string B)\r\n\r\n\u8ba1\u7b972\u4e2a\u5b57\u7b26\u4e4b\u95f4\u7684 levenshtein \u8ddd\u79bb\r\n*\/\r\nselect levenshtein('kitten', 'sitting');\r\n-- 3\r\n\r\n\r\n\/*\r\n\u8bed\u6cd5: parse_url(string urlString, string partToExtract [, stringkeyToExtract])\r\n\u8fd4\u56de\u503c: string\r\n\u8bf4\u660e\uff1a\u8fd4\u56deURL\u4e2d\u6307\u5b9a\u7684\u90e8\u5206\u3002partToExtract\u7684\u6709\u6548\u503c\u4e3a: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, \u548c USERINFO \u3002\r\n*\/\r\nselect parse_url('http:\/\/facebook.com\/path1\/p.php?k1=v1&amp;k2=v2#Ref1', 'HOST')\r\n-- 'facebook.com'\r\n\r\nselect parse_url('http:\/\/facebook.com\/path1\/p.php?k1=v1&amp;k2=v2#Ref1', 'QUERY', 'k1')\r\n-- 'v1'\r\n<\/pre>\n<p>&nbsp;<\/p>\n<h6><a href=\"https:\/\/cwiki.apache.org\/confluence\/display\/Hive\/LanguageManual+UDF\">\u5b98\u65b9 LanguageManual UDF<\/a><\/h6>\n<p>`<br \/>\n\u5185\u7f6e\u7684\u64cd\u4f5c\u7b26<br \/>\n\u5185\u7f6e\u7684\u51fd\u6570<br \/>\n\u5185\u7f6e\u7684\u805a\u5408\u51fd\u6570(UDAF)<br \/>\n\u5185\u7f6e\u7684Table-Generating\u51fd\u6570(UDTF)<br \/>\n\u9488\u5bf9f(column)\u7684\u5206\u7ec4\u548c\u6392\u5e8f<br \/>\nUDF\u5185\u90e8<br \/>\n\u521b\u5efa\u81ea\u5b9a\u4e49\u7684UDF<br \/>\n`<\/p>\n<p>&nbsp;<\/p>\n<h5 id=\"id-\u6a21\u677f-\u53c2\u8003\u94fe\u63a5\uff1a\">\u53c2\u8003\u94fe\u63a5\uff1a<\/h5>\n<ul>\n<li><a href=\"https:\/\/stackoverflow.com\/questions\/38144215\/hiveql-string-contains-equivalent-in-hiveql-udf\">HiveQL &#8211; String contains equivalent in hiveql UDF?<\/a><\/li>\n<li><a href=\"https:\/\/community.hortonworks.com\/questions\/24629\/find-a-keyword-in-a-column-in-hive-table.html\">Find a keyword in a column in Hive table<\/a><\/li>\n<li><a href=\"https:\/\/blog.csdn.net\/qq_34105362\/article\/details\/80540164\">\u3010Hive\u3011join\u4e2d\u7684\u6a21\u7cca\u5339\u914d\uff08locate\uff09<\/a><\/li>\n<li><a href=\"https:\/\/blog.csdn.net\/ruidongliu\/article\/details\/11735507\">SQL\u4e2d\u7684Case When Else End\u7684\u7528\u6cd5<\/a><\/li>\n<li><a href=\"https:\/\/cwiki.apache.org\/confluence\/display\/Hive\/LanguageManual+SubQueries\">\u5b98\u65b9 LanguageManual SubQueries<\/a><\/li>\n<li><a href=\"https:\/\/cwiki.apache.org\/confluence\/display\/Hive\/Subqueries+in+SELECT\">Subqueries in SELECT<\/a><\/li>\n<\/ul>\n<p>=END=<\/p>\n","protected":false},"excerpt":{"rendered":"<p>=Start= \u7f18\u7531\uff1a \u7ee7\u7eed\u6574\u7406\u6700\u8fd1\u5b66\u5230\u6216\u662f\u7528\u5230\u7684Hive SQL\u77e5\u8bc6\uff0c\u65b9\u4fbf\u4ee5\u540e\u53c2\u8003\u3002 \u6b63\u6587\uff1a \u53c2\u8003\u89e3\u7b54\uff1a 1 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[14,23,12],"tags":[1284,1261,1262,1285],"class_list":["post-4248","post","type-post","status-publish","format-standard","hentry","category-database","category-knowledgebase-2","category-tools","tag-case","tag-hive","tag-sql","tag-subquery"],"views":15056,"_links":{"self":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/4248","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/comments?post=4248"}],"version-history":[{"count":1,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/4248\/revisions"}],"predecessor-version":[{"id":4249,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/4248\/revisions\/4249"}],"wp:attachment":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/media?parent=4248"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/categories?post=4248"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/tags?post=4248"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}