{"id":5741,"date":"2024-09-23T20:35:00","date_gmt":"2024-09-23T12:35:00","guid":{"rendered":"https:\/\/ixyzero.com\/blog\/?p=5741"},"modified":"2024-09-23T14:58:08","modified_gmt":"2024-09-23T06:58:08","slug":"hive-sql-%e4%b8%ad%e7%9a%84%e5%88%86%e5%8f%b7%e3%80%81%e4%b8%8b%e5%88%92%e7%ba%bf%e7%ad%89tips%e6%95%b4%e7%90%86","status":"publish","type":"post","link":"https:\/\/ixyzero.com\/blog\/archives\/5741.html","title":{"rendered":"Hive SQL \u4e2d\u7684\u5206\u53f7\u3001\u4e0b\u5212\u7ebf\u7b49tips\u6574\u7406"},"content":{"rendered":"\n<p>=Start=<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u7f18\u7531\uff1a<\/h4>\n\n\n\n<p>\u7b80\u5355\u6574\u7406\u4e00\u4e0b\u8fd1\u671f\u5728\u8fdb\u884c Hive SQL \u67e5\u8be2\u65f6\u78b0\u5230\u7684\u4e00\u4e9b\u95ee\u9898\u548c\u7ecf\u9a8c\uff0c\u65b9\u4fbf\u540e\u9762\u6709\u9700\u8981\u7684\u65f6\u5019\u53c2\u8003\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u6b63\u6587\uff1a<\/h4>\n\n\n\n<h5 class=\"wp-block-heading\">\u53c2\u8003\u89e3\u7b54\uff1a<\/h5>\n\n\n\n<h4 class=\"wp-block-heading\">1. Hive SQL \u5982\u4f55\u5bf9\u82f1\u6587\u5206\u53f7(;)\u505asplit\u5207\u5206\uff1f<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>&gt; select split(f1,'\\073')&#91;0] from tab_name;\n\n-- \u4f7f\u7528 '\\073' \u6765\u4ee3\u66ff ';' \u8fd9\u79cd\u5199\u6cd5\u5c31OK\uff0c\u5426\u5219\u4f1a\u62a5\u8bed\u6cd5\u9519\u8bef\u3002\n\n-- \u9a8c\u8bc1\u4ee3\u7801\u7247\u6bb5\nwith t1 as (\nSELECT\n'1\\0733' as v1\n,'1 3' as v2\n,'123' as v3\n)\n\nSELECT\nv1,split(v1,'\\073') as result1\n,v2,split(v2,'\\073| ') as result2\n,v3,split(v3,'\\073| ') as result3\nFROM\nt1<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>,split(lower(mac_address),'\\073| ') as mac_addr_array --\u5bf9mac\u5730\u5740\u5b57\u7b26\u4e32\u4f7f\u7528\u7a7a\u683c\u548c\u5206\u53f7\u4f5c\u4e3a\u5206\u9694\u7b26\u8fdb\u884c\u5207\u5206\n\n,array_contains(mac_addr_array,user_mac) as it_asset -- \u5224\u65ad\u7279\u5b9a\u7684mac\u5730\u5740\u662f\u5426\u5728mac\u5730\u5740\u6570\u7ec4\u4e2d<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">2. Hive SQL\u7684like\u5982\u4f55\u5339\u914d\u4e0b\u5212\u7ebf\uff1f<\/h4>\n\n\n\n<p>SQL\u4e2d\u5982\u4f55\u4f7f\u7528like\u5339\u914d\u4e0b\u5212\u7ebf\u7684\u5185\u5bb9\uff1f\u56e0\u4e3a\u4e0b\u5212\u7ebf\u5728like\u4e2d\u4ee3\u8868\u4efb\u610f\u5355\u4e2a\u5b57\u7b26\uff0c\u5982\u679c\u9700\u8981\u5339\u914d\u4e0b\u5212\u7ebf\u5b57\u7b26\uff0c\u9700\u8981\u5bf9\u4e0b\u5212\u7ebf\u8fdb\u884c\u8f6c\u4e49\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>WHERE mycolumn LIKE '%\\_%' ESCAPE '\\'\nWHERE mycolumn LIKE '%\\_%'\n\nWHERE mycolumn LIKE '%#_%' ESCAPE '#'\n\n\u65b9\u6cd5\u4e00\uff1a\n\u4f7f\u7528 escape \u5bf9\u4e0b\u5212\u7ebf\u8fdb\u884c\u8f6c\u4e49\uff08\u9ed8\u8ba4\u662f\u53cd\u5212\u7ebf\uff09\n\n\u65b9\u6cd5\u4e8c\uff1a\n\u4f7f\u7528 instr\/locate\/rlike \u7b49\u51fd\u6570\u8fdb\u884c\u5224\u65ad\uff0c\u4e0d\u4f7f\u7528 like \u6765\u5224\u65ad<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>-- \u9a8c\u8bc1\u4ee3\u7801\u7247\u6bb5\n\nwith t1 as (\nSELECT\n'1_3' as underscore_var\n,'123' as no_underscore_var\n)\n\nSELECT\nunderscore_var, if(underscore_var like '%\\_%',1,0) as has1\n,no_underscore_var, if(no_underscore_var like '%\\_%',1,0) as has2\n,if(no_underscore_var like '%_%',1,0) as wrong_judge_method\nFROM\nt1<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">3. Hive\u4e2d \u5e03\u5c14\u7c7b\u578b(boolean) \u7684\u53d8\u91cf\u5982\u4f55\u505a\u6bd4\u8f83\uff1f<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>-- \u7b80\u5355\u6765\u8bf4\u5c31\u662f\u53ef\u4ee5\u7528\u5e38\u89c4\u7684\u7b49\u53f7(=)\uff0c\u4e5f\u53ef\u4ee5\u4e0d\u6dfb\u52a0\u5224\u7b49\u7b26\u53f7\nCorrect syntax is:\nwhere boolean_col = True\nwhere boolean_col\nwhere NOT boolean_col\n\nTo check for NULL use:\nwhere boolean_col is NULL<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">4. Hive\u4e2d\u5982\u4f55\u5411array\/map\u7b49\u590d\u6742\u7c7b\u578b\u5b57\u6bb5\u4e2d\u6dfb\u52a0\u7a7a\u503c\uff1f<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>-- \u6682\u672a\u5b9e\u9645\u6d4b\u8bd5\n\nPrepare:\nCREATE TABLE IF NOT EXISTS tmp.test_table (\n col1 STRING,\n col2 MAP&lt;STRING, STRING&gt;,\n col3 ARRAY&lt;BIGINT&gt;\n)\nPARTITIONED BY ( ds STRING )\n;\n\nStep1: Add brickhouse jar and create temporary functions to cast array and map.\n\n&gt; ADD JAR s3a:\/\/airbnb-datainfra-dependencies-internal-only\/teams\/datainfra\/projects\/hive_aux_jars\/brickhouse-0.7.1.jar;\n&gt; CREATE TEMPORARY FUNCTION CAST_ARRAY AS 'brickhouse.udf.collect.CastArrayUDF';\n&gt; CREATE TEMPORARY FUNCTION CAST_MAP AS 'brickhouse.udf.collect.CastMapUDF';\n\nStep 2: Insert the data:\n\n&gt; INSERT OVERWRITE TABLE tmp.test_table PARTITION(ds='2020\u201301\u201301')\n&gt; SELECT \n 'random_text' col1\n , CAST_MAP(map('', ''), 'map&lt;string,string&gt;') col2\n , CAST_ARRAY(array(), 'bigint') col3\n\nStep 3: Check the results:\n&gt; SELECT * FROM tmp.test_table;<\/code><\/pre>\n\n\n\n<h5 class=\"wp-block-heading\">\u53c2\u8003\u94fe\u63a5\uff1a<\/h5>\n\n\n\n<p>split string that includes semicolons in Hive<br><a href=\"https:\/\/stackoverflow.com\/questions\/17212511\/split-string-that-includes-semicolons-in-hive\">https:\/\/stackoverflow.com\/questions\/17212511\/split-string-that-includes-semicolons-in-hive<\/a><\/p>\n\n\n\n<p>Why does using an Underscore character in a LIKE filter give me all the results?<br><a href=\"https:\/\/stackoverflow.com\/questions\/19588455\/why-does-using-an-underscore-character-in-a-like-filter-give-me-all-the-results\">https:\/\/stackoverflow.com\/questions\/19588455\/why-does-using-an-underscore-character-in-a-like-filter-give-me-all-the-results<\/a><\/p>\n\n\n\n<p>\u3010Hive\u3011like + \u8f6c\u4e49\u5b57\u7b26<br><a href=\"https:\/\/blog.csdn.net\/qq_34105362\/article\/details\/80415526\">https:\/\/blog.csdn.net\/qq_34105362\/article\/details\/80415526<\/a><\/p>\n\n\n\n<p>HIVE Boolean Filter<br><a href=\"https:\/\/stackoverflow.com\/questions\/68313045\/hive-boolean-filter\">https:\/\/stackoverflow.com\/questions\/68313045\/hive-boolean-filter<\/a><\/p>\n\n\n\n<p>Insert NULL into Hive complex columns like ARRAY, MAP etc.<br><a href=\"https:\/\/medium.com\/@rajnishkumargarg\/insert-null-into-hive-complex-columns-like-array-map-etc-a76e320d3e7e\">https:\/\/medium.com\/@rajnishkumargarg\/insert-null-into-hive-complex-columns-like-array-map-etc-a76e320d3e7e<\/a><br><a href=\"https:\/\/github.com\/jeromebanks\/brickhouse\/blob\/master\/src\/main\/java\/brickhouse\/udf\/collect\/CastMapUDF.java\">https:\/\/github.com\/jeromebanks\/brickhouse\/blob\/master\/src\/main\/java\/brickhouse\/udf\/collect\/CastMapUDF.java<\/a><\/p>\n\n\n\n<p>=END=<\/p>\n","protected":false},"excerpt":{"rendered":"<p>=Start= \u7f18\u7531\uff1a \u7b80\u5355\u6574\u7406\u4e00\u4e0b\u8fd1\u671f\u5728\u8fdb\u884c Hive SQL \u67e5\u8be2\u65f6\u78b0\u5230\u7684\u4e00\u4e9b\u95ee\u9898\u548c\u7ecf\u9a8c\uff0c\u65b9\u4fbf\u540e\u9762\u6709\u9700\u8981\u7684 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[14,23],"tags":[2337,2338,2339,443,1261,483,357,1262],"class_list":["post-5741","post","type-post","status-publish","format-standard","hentry","category-database","category-knowledgebase-2","tag-2337","tag-brickhouse","tag-castmapudf","tag-escape","tag-hive","tag-like","tag-split","tag-sql"],"views":1837,"_links":{"self":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5741","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/comments?post=5741"}],"version-history":[{"count":1,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5741\/revisions"}],"predecessor-version":[{"id":5742,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/5741\/revisions\/5742"}],"wp:attachment":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/media?parent=5741"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/categories?post=5741"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/tags?post=5741"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}