Twisted is an event-driven networking engine written in Python and licensed under the open source:
Refer:
The event loop at the core of your program.
Som basic functions in core reactor twisted/internet/base.py:
class ReactorBase(object): def __init__(self): self.threadCallQueue = [] self._eventTriggers = {} self._pendingTimedCalls = [] self._newTimedCalls = [] self._cancellations = 0 self.running = False self._started = False self._justStopped = False self._startedBefore = False # reactor internal readers, e.g. the waker. self._internalReaders = set() self.waker = None # Arrange for the running attribute to change to True at the right time # and let a subclass possibly do other things at that time (eg install # signal handlers). self.addSystemEventTrigger( 'during', 'startup', self._reallyStartRunning) self.addSystemEventTrigger('during', 'shutdown', self.crash) self.addSystemEventTrigger('during', 'shutdown', self.disconnectAll) if platform.supportsThreads(): self._initThreads() self.installWaker()
@implementer(IReactorCore, IReactorTime, IReactorPluggableResolver) class ReactorBase(object): def fireSystemEvent(self, eventType): """See twisted.internet.interfaces.IReactorCore.fireSystemEvent. """ event = self._eventTriggers.get(eventType) if event is not None: event.fireEvent() def startRunning(self): if self._started: raise error.ReactorAlreadyRunning() if self._startedBefore: raise error.ReactorNotRestartable() self._started = True self._stopped = False if self._registerAsIOThread: threadable.registerAsIOThread() self.fireSystemEvent('startup') class _SignalReactorMixin(object): def run(self, installSignalHandlers=True): self.startRunning(installSignalHandlers=installSignalHandlers) self.mainLoop() def mainLoop(self): while self._started: try: while self._started: # Advance simulation time in delayed event # processors. self.runUntilCurrent() t2 = self.timeout() t = self.running and t2 self.doIteration(t) except: log.msg("Unexpected error in main loop.") log.err() else: log.msg('Main loop terminated.') def runUntilCurrent(self): """Run all pending timed calls. """ if self.threadCallQueue: # Keep track of how many calls we actually make, as we're # making them, in case another call is added to the queue # while we're in this loop. count = 0 total = len(self.threadCallQueue) for (f, a, kw) in self.threadCallQueue: try: f(*a, **kw) except: log.err() count += 1 if count == total: break del self.threadCallQueue[:count] if self.threadCallQueue: self.wakeUp() # insert new delayed calls now self._insertNewDelayedCalls() now = self.seconds() while self._pendingTimedCalls and (self._pendingTimedCalls[0].time <= now): call = heappop(self._pendingTimedCalls) if call.cancelled: self._cancellations-=1 continue if call.delayed_time > 0: call.activate_delay() heappush(self._pendingTimedCalls, call) continue try: call.called = 1 call.func(*call.args, **call.kw) except: log.deferr() if hasattr(call, "creator"): e = "\n" e += " C: previous exception occurred in " + \ "a DelayedCall created here:\n" e += " C:" e += "".join(call.creator).rstrip().replace("\n","\n C:") e += "\n" log.msg(e) if (self._cancellations > 50 and self._cancellations > len(self._pendingTimedCalls) >> 1): self._cancellations = 0 self._pendingTimedCalls = [x for x in self._pendingTimedCalls if not x.cancelled] heapify(self._pendingTimedCalls) if self._justStopped: self._justStopped = False self.fireSystemEvent("shutdown")
def callLater(self, _seconds, _f, *args, **kw): """See twisted.internet.interfaces.IReactorTime.callLater. """ assert callable(_f), "%s is not callable" % _f assert _seconds >= 0, \ "%s is not greater than or equal to 0 seconds" % (_seconds,) tple = DelayedCall(self.seconds() + _seconds, _f, args, kw, self._cancelCallLater, self._moveCallLaterSooner, seconds=self.seconds) self._newTimedCalls.append(tple) return tple
Timeouts, repeated events, and more: when you want things to happen later.
Like callback functions, only a lot better.Twisted’s preferred mechanism for controlling the flow of asynchronous code. We would still need a way of saying “do this only when that has finished”.
Below are some basic function of defer API:
def succeed(result): d = Deferred() d.callback(result) return d def fail(result=None): d = Deferred() d.errback(result) return d def maybeDeferred(f, *args, **kw): try: result = f(*args, **kw) except: return fail(failure.Failure(captureVars=Deferred.debug)) if isinstance(result, Deferred): return result elif isinstance(result, failure.Failure): return fail(result) else: return succeed(result)
def __init__(self, canceller=None): self.callbacks = [] self._canceller = canceller if self.debug: self._debugInfo = DebugInfo() self._debugInfo.creator = traceback.format_stack()[:-1] def addCallbacks(self, callback, errback=None, callbackArgs=None, callbackKeywords=None, errbackArgs=None, errbackKeywords=None): assert callable(callback) assert errback == None or callable(errback) cbs = ((callback, callbackArgs, callbackKeywords), (errback or (passthru), errbackArgs, errbackKeywords)) self.callbacks.append(cbs) if self.called: self._runCallbacks() return self def addCallback(self, callback, *args, **kw): return self.addCallbacks(callback, callbackArgs=args, callbackKeywords=kw) def addErrback(self, errback, *args, **kw): return self.addCallbacks(passthru, errback, errbackArgs=args, errbackKeywords=kw) def callback(self, result): assert not isinstance(result, Deferred) self._startRunCallbacks(result)
TCP servers , TCP clients , UDP networking and Using processes
The lxml XML toolkit is a Pythonic binding for the C libraries libxml2 and libxslt:
two the most basic classes in lxml packages for parsing xml and html:
refer: http://lxml.de/parsing.html
etree.parse return lxml.etree._ElementTree object
import StringIO from lxml import etree f = StringIO.StringIO('<foo><bar></bar></foo>') tree = etree.parse(f)
from lxml import etree tree = etree.parse("doc/test.xml")
import StringIO from lxml import etree f = StringIO.StringIO('<foo><bar></bar></foo>') context = etree.iterparse(f) for action, elem in context: print("%s: %s" % (action, elem.tag))
end: bar end: foo
import StringIO from lxml import etree from lxml.html import HTMLParser broken_html = "<html><head><title>test<body><h1>page title</h3>" parser = HTMLParser() tree = etree.parse(StringIO.StringIO(broken_html), parser) result = etree.tostring(tree.getroot(), pretty_print=True, method="html") print(result)
from lxml import etree from lxml.html import HTMLParser parser = HTMLParser() tree = etree.parse('index.html', parser)
import StringIO from lxml import etree with open('index.html', 'r') as f: htmlcontent = f.read() context = etree.iterparse(StringIO.StringIO(htmlcontent), html = True) for action, elem in context: print("%s: %s" % (action, elem.tag))
XPath for python:
refer: http://www.w3schools.com/XPath/xpath_syntax.asp
XPath Expressions:
XPath function return:
import StringIO from lxml import etree f = StringIO.StringIO('<foo><bar></bar></foo>') tree = etree.parse(f) r = tree.xpath('/foo/bar') print r[0].tag len(r) r = tree.xpath('bar') r[0].tag print r[0].tag
⇒output:
bar bar
<div title="buyer-name">Carson Busses</div> <span class="item-price">$29.95</span>
from lxml import html import requests page = requests.get('http://econpy.pythonanywhere.com/ex/001.html') tree = html.fromstring(page.text) #This will create a list of buyers: buyers = tree.xpath('//div[@title="buyer-name"]/text()') #This will create a list of prices prices = tree.xpath('//span[@class="item-price"]/text()') print 'Buyers: ', buyers print 'Prices: ', prices
⇒output:
Buyers: ['Carson Busses', 'Earl E. Byrd', 'Patty Cakes', 'Derri Anne Connecticut', 'Moe Dess', 'Leda Doggslife', 'Dan Druff', 'Al Fresco', 'Ido Hoe', 'Howie Kisses', 'nt', 'Ben D. Rules', 'Ave Sectomy', 'Gary Shattire', 'Bobbi Soks', 'Sheila Takya', 'Rose Tattoo', 'Moe Tell'] Prices: ['$29.95', '$8.37', '$15.26', '$19.25', '$19.25', '$13.99', '$31.57', '$8.49', '$14.47', '$15.86', '$11.11', '$15.98', '$16.27', '$7.50', '$50.85', '$14.26', ' 0.09']
from HTMLParser import HTMLParser # create a subclass and override the handler methods class MyHTMLParser(HTMLParser): def handle_starttag(self, tag, attrs): print "Encountered a start tag:", tag def handle_endtag(self, tag): print "Encountered an end tag :", tag def handle_data(self, data): print "Encountered some data :", data # instantiate the parser and fed it some HTML parser = MyHTMLParser() parser.feed('''<html><head><title>Test</title></head> <body><h1>Parse me!</h1></body></html>''')
output:
Encountered a start tag: html Encountered a start tag: head Encountered a start tag: title Encountered some data : Test Encountered an end tag : title Encountered an end tag : head Encountered some data : Encountered a start tag: body Encountered a start tag: h1 Encountered some data : Parse me! Encountered an end tag : h1 Encountered an end tag : body Encountered an end tag : html
refer: http://lxml.de/lxmlhtml.html
from lxml import html, etree from lxml.html import HTMLParser import StringIO import requests tree = html.parse('http://www.google.com') tree.write('index.html')
Or
from lxml import html import requests page = requests.get('http://www.google.com') tree = html.fromstring(page.text) r = tree.xpath('//title'); print r[0].text
from lxml import html as HTML tree = HTML.parse('index.html') r = tree.xpath('//title'); print r[0].tag print r[0].text
import StringIO from lxml import etree from lxml.html import HTMLParser broken_html = "<html><head><title>test<body><h1>page title</h3>" parser = HTMLParser() tree = etree.parse(StringIO.StringIO(broken_html), parser) result = etree.tostring(tree.getroot(), pretty_print=True, method="html") print(result)
from xml.etree import ElementTree as ET ''' <?xml version="1.0"?> <data> <country name="Liechtenstein"> <rank>1</rank> <year>2008</year> </country> <country name="Singapore"> <rank>4</rank> <year>2011</year> </country> </data> ''' data = ET.Element('data') country1 = ET.SubElement(data, 'country', {'name':'Liechtenstein'}) rank1 = ET.SubElement(country1, 'rank') rank1.text = '1' year1 = ET.SubElement(country1, 'year') year1.text = '2008' country2 = ET.SubElement(data, 'country', {'name':'Singapore'}) rank2 = ET.SubElement(country2, 'rank') rank2.text = '4' year2 = ET.SubElement(country2, 'year') year2.text = '2011' print ET.tostring(data)
output:
<data><country name="Liechtenstein"><rank>1</rank><year>2008</year></country><country name="Singapore"><rank>4</rank><year>2011</year></country></data>
from lxml import etree as ET ''' <?xml version="1.0"?> <data> <country name="Liechtenstein"> <rank>1</rank> <year>2008</year> </country> <country name="Singapore"> <rank>4</rank> <year>2011</year> </country> </data> ''' data = ET.Element('data') country1 = ET.SubElement(data, 'country', {'name':'Liechtenstein'}) rank1 = ET.SubElement(country1, 'rank') rank1.text = '1' year1 = ET.SubElement(country1, 'year') year1.text = '2008' country2 = ET.SubElement(data, 'country', {'name':'Singapore'}) rank2 = ET.SubElement(country2, 'rank') rank2.text = '4' year2 = ET.SubElement(country2, 'year') year2.text = '2011' print ET.tostring(data)
output:
<data><country name="Liechtenstein"><rank>1</rank><year>2008</year></country><country name="Singapore"><rank>4</rank><year>2011</year></country></data>
from lxml.html import HtmlElement from lxml import etree from lxml import html as HTML tree = HTML.parse('index.html') r = tree.xpath('//div[@id="content"]'); print(etree.tostring(r[0], pretty_print=True, encoding='utf-8'))
from lxml.html import HtmlElement from lxml import etree from lxml import html as HTML tree = HTML.parse('index.html') r = tree.xpath('//div[@id="content"]'); print(etree.tostring(r[0], pretty_print=True, method="html"))
from lxml import html, etree from lxml.html import HTMLParser import StringIO import requests page = requests.get('http://shop.babies.vn') parser = HTMLParser() tree = etree.parse(StringIO.StringIO(page.text), parser) tree.write('index.html', method = 'html')
Or
from lxml import html, etree from lxml.html import HTMLParser import StringIO import requests tree = html.parse('http://shop.babies.vn') tree.write('index.html', method = 'html')
To use re package, we need to import it:
import re
A regular expression (abbreviated regex or regexp) is a sequence of characters that forms a search pattern
refer:
Match Character
Anchors: cause a match to succeed or fail depending on the current position in the string
Grouping constructs: Grouping constructs delineate subexpressions of a regular expression and typically capture substrings of an input string
Quantifier: A quantifier specifies how many instances of the previous element (which can be a character, a group, or a character class) must be present in the input string for a match to occur
refer: http://www.pythonforbeginners.com/regex/regular-expressions-in-python
re Flags:
# flags I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
findall: The findall() is probably the single most powerful function in the re module
str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher' ## Here re.findall() returns a list of all the found email strings emails = re.findall(r'[\w\.-]+@[\w\.-]+', str) ## ['alice@google.com', 'bob@abc.com'] for email in emails: # do something with each found email string print email
Understand pattern syntax above:
# Open file f = open('test.txt', 'r') # Feed the file text into findall(); it returns a list of all the found strings strings = re.findall(r'some pattern', f.read())
str = 'an example word:cat!!' match = re.search(r'word:www', str) # If-statement after search() tests if it succeeded if match: print 'found', match.group() ## 'found word:cat' else: print 'did not find'
As you can see in the example below, I have used the | operator, which search for either pattern I specify.
import re programming = ["Python", "Perl", "PHP", "C++"] pat = "^B|^P|i$|H$" for lang in programming: if re.search(pat,lang,re.IGNORECASE): print lang , "FOUND" else: print lang, "NOT FOUND"
The output of above script will be:
Python FOUND Perl FOUND PHP FOUND C++ NOT FOUND
import re text = "The Attila the Hun Show" # a single character m = re.match(".", text) if m: print repr("."), "=>", repr(m.group(0)) # any string of characters m = re.match(".*", text) if m: print repr(".*"), "=>", repr(m.group(0)) # a string of letters (at least one) m = re.match("\w+", text) if m: print repr("\w+"), "=>", repr(m.group(0)) # a string of digits m = re.match("\d+", text) if m: print repr("\d+"), "=>", repr(m.group(0))
output:
'.' => 'T' '.*' => 'The Attila the Hun Show' '\\w+' => 'The'
import re print '**********************************' text ="10/15/99" print "match1:" m = re.match("(\d{2})/(\d{2})/(\d{2,4})", text) if m: print m.group(1, 2, 3) print "search1:" s = re.search("(\d{2})/(\d{2})/(\d{2,4})", text) if s: print s.group(1, 2, 3) print '**********************************' text ="hello 10/15/99" print "match2:" m = re.match("(\d{2})/(\d{2})/(\d{2,4})", text) if m: print m.group(1, 2, 3) print "search2:" s = re.search("(\d{2})/(\d{2})/(\d{2,4})", text) if s: print s.group(1, 2, 3)
output:
********************************** match1: ('10', '15', '99') search1: ('10', '15', '99') ********************************** match2: search2: ('10', '15', '99')
import re text = "Python for beginner is a very cool website" text2 = re.sub("cool", "good", text) print text2
output
Python for beginner is a very good website
str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher' ## re.sub(pat, replacement, str) -- returns new string with all replacements, ## 1 is group(1), 2 group(2) in the replacement print re.sub(r'([\w.-]+)@([\w.-]+)', r'1@yo-yo-dyne.com', str) ## purple alice@yo-yo-dyne.com, blah monkey bob@yo-yo-dyne.com blah dishwasher
output:
purple alice@yo-yo-dyne.com, blah monkey bob@yo-yo-dyne.com blah dishwasher
import re name_check = re.compile(r"[^A-Za-zs.]") name = raw_input ("Please, enter your name: ") while name_check.search(name): print "Please enter your name correctly!" name = raw_input ("Please, enter your name: ")
import re phone_check = re.compile(r"[^0-9s-()]") phone = raw_input ("Please, enter your phone: ") while phone_check.search(phone): print "Please enter your phone correctly!" phone = raw_input ("Please, enter your phone: ")
The output of above script will be:
Please, enter your phone: s Please enter your phone correctly!
It will continue to ask until you put in numbers only.