{"id":417,"date":"2014-07-05T06:39:59","date_gmt":"2014-07-05T06:39:59","guid":{"rendered":"http:\/\/ixyzero.com\/blog\/?p=417"},"modified":"2014-07-05T06:39:59","modified_gmt":"2014-07-05T06:39:59","slug":"python%e5%86%99%e7%9a%84%e6%8a%93%e5%8f%96%e8%84%9a%e6%9c%acbak","status":"publish","type":"post","link":"https:\/\/ixyzero.com\/blog\/archives\/417.html","title":{"rendered":"Python\u5199\u7684\u6293\u53d6\u811a\u672c[bak]"},"content":{"rendered":"<p>\u4eceFreebuf\u4e0a\u627e\u5230\u7684\u4e00\u4e2a\u4ee3\u7801\u5199\u5f97\u975e\u5e38\u6e05\u6670\u7684\u6293\u53d6\u811a\u672c\uff0c\u6548\u679c\u4e5f\u662f\u6760\u6760\u7684\uff0c\u653e\u5728\u8fd9\u513f\u505a\u4e2a\u5907\u4efd\uff0c\u800c\u4e14\u65f6\u4e0d\u65f6\u7684\u9700\u8981\u7528\u4e00\u7528\uff08PS\uff1a\u6211\u8bb0\u5f97\u5f53\u65f6\u6539\u5199\u4e86\u4e00\u4e0b\uff0c\u53ef\u4ee5\u6293\u53d6\u5176\u4ed6\u677f\u5757\u7684\u6587\u7ae0\uff0c\u4f46\u662f\u4e0d\u77e5\u9053\u653e\u54ea\u53bb\u4e86\uff0c\u4ece\u6b64\u6211\u6df1\u77e5\u5907\u4efd\u7684\u91cd\u8981\u6027\uff01\uff09\uff0c\u5e76\u4e14\u53ef\u4ee5\u4ece\u8fd9\u4e2a\u811a\u672c\u4e2d\u901a\u6653\u6b64\u7c7b\u7684\u95ee\u9898\u7684\u7b80\u5355\u5904\u7406\u65b9\u6cd5\u3002<\/p>\n<pre class=\"lang:python decode:true \">#!\/usr\/bin\/env python\n#coding=utf-8\n\n'''\nFreebuf\u5de5\u5177\u6293\u53d6\u811a\u672c\n'''\n\nimport re,sys\nfrom urllib     import urlopen\nfrom Queue      import Queue\nfrom threading  import Thread\nfrom time       import strftime\n\n\nbaseUrl='http:\/\/www.freebuf.com\/tools\/page\/'\noutPut='FreebufToolsListX.html'\npageNum=32+1\nthreadNum=10\nurlList=[]\nthreadList=[]\nurlNum=0\n\ndef spiderIndex(url):\n    '''\n    spider function\n    '''\n    global urlNum\n    try:\n        res=urlopen(url)\n    except Exception,e:\n        print '[-] [%s] [Error] [%s]'\n    if res.getcode()==200:\n        html=res.read()\n        lines=html.split('n')\n        for line in lines:\n            rex=re.search(r'(&lt;dt&gt;&lt;a href=\")(http:\/\/www.freebuf.com\/tools\/d*.html)(\" target=\"_blank\"&gt;).*',line)\n            if rex!=None:\n                urlNum+=1\n                urlList.append(rex.group())\n                sys.stdout.write('r[*] [%s] [Working] [%s]'%(str(strftime('%X')) ,str(urlNum)))\n\nclass WorkThread(Thread):\n    '''\n    work thread\n    '''\n    def __init__(self,q):\n        Thread.__init__(self)\n        self.q=q\n    def run(self):\n        while True:\n            if self.q.empty()==True:\n                break\n            _url=baseUrl+str(self.q.get())\n            spiderIndex(_url)\n\ndef main():\n    '''\n    main function\n    '''\n    q=Queue(maxsize=0)\n    for i in xrange(1,pageNum,1):\n        q.put(i)\n\n    print '[+] [%s] [Start]'%strftime('%X')\n\n    spiderIndex('http:\/\/www.freebuf.com\/tools')\n\n    for i in xrange(threadNum):\n        t=WorkThread(q)\n        threadList.append(t)\n\n    for i in threadList:\n        i.start()\n\n    for i in threadList:\n        i.join()\n\n    f=open(outPut,'ab')\n    f.write('&lt;meta http-equiv=\"Content-Type\" content=\"text\/html; charset=utf-8\" \/&gt;n')\n    f.write('&lt;title&gt;Freebuf Tools List&lt;\/title&gt;n')\n    f.write('&lt;center&gt;&lt;h1&gt;&lt;b&gt;Freebuf Tools List&lt;\/b&gt;&lt;\/h1&gt;n'+'Time:'+str(strftime(\"%Y-%b-%d %X\"))+'  Count:'+str(len(urlList))+'&lt;\/center&gt;&lt;hr\/&gt;n&lt;h5&gt;n')\n    for line in urlList:\n        f.write(line+'&lt;\/br&gt;n')\n    f.close()\n\n    print 'n[+] [%s] [End] [All Done!]'%strftime('%X')\n    print '[+] [%s] [Save As] [%s]'%(strftime('%X'),outPut)\n\nif __name__=='__main__':\n    main()<\/pre>\n<p>\u539f\u6587\u5730\u5740\uff1a<a href=\"http:\/\/www.freebuf.com\/tools\/25746.html\" target=\"_blank\">http:\/\/www.freebuf.com\/tools\/25746.html<\/a><\/p>\n<p>\u4ece\u6b64\u6211\u4e5f\u8ba2\u9605\u4e86\u8be5\u4f5c\u8005\u7684\u767e\u5ea6\u7a7a\u95f4\uff0c\u8fd8\u662f\u6709\u5f88\u591a\u4e0d\u9519\u7684\u5185\u5bb9\u7684\uff1a<\/p>\n<p><a href=\"http:\/\/hi.baidu.com\/l34rn\/\" target=\"_blank\">http:\/\/hi.baidu.com\/l34rn\/<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4eceFreebuf\u4e0a\u627e\u5230\u7684\u4e00\u4e2a\u4ee3\u7801\u5199\u5f97\u975e\u5e38\u6e05\u6670\u7684\u6293\u53d6\u811a\u672c\uff0c\u6548\u679c\u4e5f\u662f\u6760\u6760\u7684\uff0c\u653e\u5728\u8fd9\u513f\u505a\u4e2a\u5907\u4efd\uff0c\u800c\u4e14\u65f6\u4e0d\u65f6\u7684\u9700\u8981\u7528\u4e00 [&hellip;]<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[7,25,12],"tags":[8,200,124,125],"class_list":["post-417","post","type-post","status-publish","format-standard","hentry","category-programing","category-security","category-tools","tag-python","tag-queue","tag-thread","tag-urllib"],"views":2914,"_links":{"self":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/417","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/comments?post=417"}],"version-history":[{"count":0,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/posts\/417\/revisions"}],"wp:attachment":[{"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/media?parent=417"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/categories?post=417"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/ixyzero.com\/blog\/wp-json\/wp\/v2\/tags?post=417"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}