Python知识积累_0

Python的字符串split函数

>>> help(str.split)
Help on method_descriptor:

split(...)
    S.split([sep [,maxsplit]]) -> list of strings

Return a list of the words in the string S, using sep as the delimiter string.  If maxsplit is given, at most maxsplit splits are done. If sep is not specified or is None, any whitespace string is a separator and empty strings are removed from the result.

测试代码片段：

testStr = 'a b  c   d     efg'
print testStr

res = testStr.split(' ')
for id, item in enumerate(res):
       print id, item

res2 = testStr.split()
for id, item in enumerate(res2):
       print id, item

小细节：如果你的分隔符是一个或者多个空格(空白字符)，那么你就不必给出任何参数，即直接使用str.split() 即可！需要注意的是这种分隔方法对于中间为空的项会忽略，也就是说如果你要对多行文本逐行split()，假如你期望每行都被分隔为10列，但是如果某一列为空，那么你可能只会得到9列！这个问题该怎么解决？还不知道……

Python用多个分隔符拆分字符串

很多时候我们需要用多个分隔符拆分字符串，例如拆分标签时，既要用中文的分号，也要用到英文的分号。但是字符串自带的split()函数无法达到这样的功能，所以需要自己写一个：

def tsplit(string, delimiters):
    """Behaves str.split but supports multiple delimiters."""
    delimiters = tuple(delimiters)
    stack = [string,]
    for delimiter in delimiters:
        for i, substring in enumerate(stack):
            substack = substring.split(delimiter)
            stack.pop(i)
            for j, _substring in enumerate(substack):
                stack.insert(i+j, _substring)

    return stack
####
s = 'thing1,thing2/thing3-thing4'
print tsplit(s, (',', '/', '-'))	# ['thing1', 'thing2', 'thing3', 'thing4']
print tsplit('你好，Python,yoyo-checknow. Justdoit!', (',', '，', '.'))	# ['xe4xbdxa0xe5xa5xbd', 'Python', 'yoyo-checknow', ' Justdoit!']

Python打印字典

之前在使用Python的字典进行去重操作时，因为最先学习的是别人的方法（最后使用了sorted函数进行了个排序操作）：

soredic = sorted(self.dic.items(), key=lambda d:d[1], reverse=True)
counts = 0;
for item in soredic:
    if counts==int(self.count):
        break
    print("IP: %stTotal Times: %s"%(item[0], item[1]))
    counts = counts+1

但是，当字典中存放的内容较多时，使用sorted函数会消耗时间，因为提前排序是没什么实际效用的，但是当时我还不知道该怎么打印字典，后来搜索了之后才明白：

如果直接对dict类型的数据进行循环，则会报错：

>>> print dict_str
{'blue': '[email protected]', 'allen': '[email protected]', 'sophia': '[email protected]', 'ceen': '[email protected]'}

>>> for key, value in dict_str:
...     print key, value
...
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: too many values to unpack

因为需要dict.items()或dict.keys()才能产生一个可迭代的对象，否则会解包失败。

对字典数据进行排序(对字典进行排序需要使用sorted()方法)
dict_str = {‘blue’:’[email protected]’,
‘allen’:’[email protected]’,
‘sophia’:’[email protected]’,
‘ceen’:’[email protected]’}
print dict_str
# 按照key进行排序
print sorted(dict_str.items(), key=lambda d: d[0])
# 按照value进行排序
print sorted(dict_str.items(), key=lambda d: d[1])

在Python 中打印字典，如果字典里面又有字典，则输出不好看。

方法一：使用内置的pprint

方法二：自己写一个函数

>>> d = {1:{'who':{'name':'Michael','age':20,'job':{'P':'90-91','G':'91-93'}}},2:{'who':{'name':'Mary','age':18,'job':{'P':'90-93','G':'93-95'}}}}
>>> d
{1: {'who': {'job': {'P': '90-91', 'G': '91-93'}, 'age': 20, 'name': 'Michael'}}, 2: {'who': {'job': {'P': '90-93', 'G': '93-95'}, 'age': 18, 'name': 'Mary'}}}

def print_dict (d, n=0):
    for k, v in d.items():
        print 't'*n,
        if type(v)==type({}):
            print "%s : {" % k
            print_dict(v,n+1)
        else:
            print("%s : %s" % (k,v))
    if n != 0:
        print 't'*(n-1)+ '}'

>>> print_dict(d)
 1 : {
	who : {
		job : {
			P : 90-91
			G : 91-93
		}
		age : 20
		name : Michael
	}
}
 2 : {
	who : {
		job : {
			P : 90-93
			G : 93-95
		}
		age : 18
		name : Mary
	}
}

参考：http://ipython.iteye.com/blog/1962679

Python的KeyError异常

如果不知道dict中是否有key的值，那么最好用dict.get(key) 方法取值

如果用dict[key] 的方式进行读取则会报KeyError异常，

dict.get方法主要是提供一个取不到对应key的value就返回默认值的功能，而dict[key]实际上是调用了__getitem__方法

说明（ help(dict.get) 或 dir(dict.get) ）：

D.get(key[, d]) ->D[k] if k in D, else d. d defaults to None.

Python根据主机名字获得所有ip地址

import sys, socket
result = socket.getaddrinfo('www.baidu.com', None, 0, socket.SOCK_STREAM)
for id, item in enumerate(result):
    print "%-2d: %s" % (id, item[4])

显示Python代码执行时间：

from time import strftime

print '[+] [%s] [Start]n' % strftime('%X')

# code

print 'n[+] [%s] [End]' % strftime('%X')

随机密码生成：

import random
class Dictor():
    CSet='abcdefghijklmnopqrstuvwxyz0123456789'
    def __init__(self, minlen, maxlen):
        if maxlen>minlen:
            self.__minlen=minlen
            self.__maxlen=maxlen
        else:
            self.__minlen=maxlen
            self.__maxlen=minlen
    def __iter__(self):
        return self
    def next(self):
        ret = ''
        for i in range(0, random.randrange(self.__minlen, self.__maxlen+1)):
            ret += random.choice(Dictor.CSet)
        return ret

if __name__=='__main__':
    for word in Dictor(3, 3):
        print str(word)

随机密码生成2：

import string, random
def makePassword(minlength = 5, maxlength = 25):
    length = random.randint(minlength, maxlength)
    letters = string.ascii_letters + string.digits
    return ''.join([random.choice(letters) for _ in range(length)])
print makePassword()

print (sorted(random.sample(range(1,36), 5)) + sorted(random.sample(range(1,13),2)))

测试URL的有效性：

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
good_list = []
bad_list = []

def get_responses(url):
    try:
        resp = urllib2.urlopen(url, timeout=2)
    except urllib2.URLError, e:
        print e
        return 0
    retcode = resp.getcode()
    if retcode == 200:
        good_list.append(url)

urllist = open('urllist.txt', 'r')
for item in urllist:
    get_responses(item)
urllist.close()
print "Good URLs:%d 个" %(len(good_list))


# !/usr/bin/python
# -*- coding: utf-8 -*-
import urllib2
good_list = []
bad_list = []

def get_responses(url):
    try:
        resp = urllib2.urlopen(url, timeout=3)
    except urllib2.URLError, e:
        print e
        bad_list.append(url)
        return 0
    retcode = resp.getcode()
    if retcode == 200:
        good_list.append(url)
        #return 1
    else:
        bad_list.append(url)
        #return 0

urllist = open('urllist.txt', 'r')
for item in urllist:
    r = get_responses(item)
    #if r > 0 :
    #    print 'Good URL: %s' % item
    #else :
    #    print 'Bad URL: %s' % item
urllist.close()
print "Total URLs: %d, Good URLs:%d, Bad URLs: %d." %((len(good_list)+len(bad_list)), len(good_list), len(bad_list))

goodurl = open('goodurl.txt', 'w+')
for item in good_list:
    goodurl.write(item+'n')
goodurl.close()

print "The mission is done, Please check the goodurl.txt file"

给目录中的文件按创建时间排序

def compare(x, y):
    stat_x = os.stat(DIR + "/" + x)
    stat_y = os.stat(DIR + "/" + y)
    if stat_x.st_ctime < stat_y.st_ctime:
        return -1
    elif stat_x.st_ctime > stat_y.st_ctime:
        return 1
    else:
        return 0

DIR = "/home/zero/workspace"
iterms = os.listdir(DIR)
iterms.sort(compare)
for iterm in iterms:
    print iterm

列表内容过滤

import os
items = os.listdir(".")
newlist = []
for names in items:
    if names.endswith(".txt"):
        newlist.append(names)
print newlist

文件内容读取（取前1000行）

#!/usr/bin/env python
# -*- coding: utf-8 -*-

set1 = [line.split()[1].strip() for line in open('sourceIP.txt').readlines()]
set2 = [line.split()[1].strip() for line in open('sourceIP2.txt').readlines()]

print list(set(set1).difference(set(set2))) # in set1 not in set2

#print list(set(set1).intersection(set(set2)))  # in both set1 and set2

top1000 = [line.strip() for line in open('sourceIP.txt').readlines()[:1000]]
top1000 = [line.split()[1].strip() for line in open('sourceIP.txt').readlines()[:1000]]

参考/学习链接：

29 9 月, 2014

admin

Programing

Python, sorted, split