Python正则表达式在使用的时候需要注意很多问题,相关的Python正则表达式问题如只有不断的学习才能更好的进行解决。下面我们就看看如何才能更好的学习。希望大家有所收获。
Java代码
#encoding=utf-8
'''
python learn regular express
url : http://docs.python.org/library/re.html
parse html url : http://www.boddie.org.uk/python/HTML.html
author : liuzheng
'''
import re
import urllib
#分析javaeye blog 频道
class ParseHTML:
'''
parse html for infomation
parse javeeye page
'''
def __init__(self,url):
self.url = url
pass
#analyses html
def parse(self):
sock = urllib.urlopen(self.url)
html = sock.read()
self.__puts(html)
pass
#打印html 匹配数据
def __puts(self,html):
b = re.compile(r"<a href='([\w./:\\]+?)'[\s]*title=([^<>]+?)
[\s]*target=([^<>]+?)>([^<>]+?)</a>",re.I)
m = re.findall(b,html)
#这里有encode 问题?,不知道,大家是否可以帮忙解答
print m
if __name__ == '__main__':
url = "http://www.javaeye.com/blogs"
p = ParseHTML(url)
p.parse()
if __debug__:
print "debuging is %s" % __debug__
print "regular" + "* " * 30
#math
str = "800-820-8800"
m = re.match(r"(\d{3})-(\d{3})-(\d{4})", str)
print "result : " ,m.groups()
#split
print "split : %s" % re.split('\W', 'Words, words, words.')
#findall
text = "He was carefully disguised but captured quickly
by police."
print "findall :%s" % re.findall(r"\w+ly",text)
#sub
text = "hello world!"
print "sub:%s" % re.sub(r"\s+","--",text)
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
以上就是对Python正则表达式的详细介绍。
【编辑推荐】