User Tools

Site Tools


python:internet

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
python:internet [2014/08/06 04:15] – [urllib2.build_opener(replace urlopen) and post with cookies] adminpython:internet [2022/10/29 16:15] (current) – external edit 127.0.0.1
Line 131: Line 131:
 </BODY></HTML> </BODY></HTML>
 </code> </code>
-===== httplib and urllib2 =====+===== httplib and urllib2(with addinfourl) =====
   * The basic class for processing http is httplib package   * The basic class for processing http is httplib package
   * urllib2 is a Python module that can be used for fetching URLs. It defines functions and classes to help with URL actions (basic and digestauthentication, redirections, cookies, etc). urllib2 handle object types below: <code python>   * urllib2 is a Python module that can be used for fetching URLs. It defines functions and classes to help with URL actions (basic and digestauthentication, redirections, cookies, etc). urllib2 handle object types below: <code python>
Line 280: Line 280:
 headers = {"Content-type": "application/x-www-form-urlencoded", headers = {"Content-type": "application/x-www-form-urlencoded",
            "Accept": "text/plain"}            "Accept": "text/plain"}
 +
 req = urllib2.Request(loginurl, data = params, headers = headers) req = urllib2.Request(loginurl, data = params, headers = headers)
 response = urllib2.urlopen(req) response = urllib2.urlopen(req)
-print 'headers:', response.info()+
 data = response.read() data = response.read()
 with open('loginresult.html', 'wb') as f: with open('loginresult.html', 'wb') as f:
     f.write(data)     f.write(data)
 +</code>Debug in httplib.HTTPConnection send:<code>
 +POST /index.php?route=account/login HTTP/1.1
 +Accept-Encoding: identity
 +Content-Length: 122
 +Host: dacsan.babies.vn
 +Accept: text/plain
 +User-Agent: Python-urllib/2.7
 +Connection: close
 +Content-Type: application/x-www-form-urlencoded
 +
 +redirect=http%3A%2F%2Fdacsan.babies.vn%2Findex.php%3Froute%3Daccount%2Faccount&password=8941362&email=itanhchi%40yahoo.com
 +GET /index.php?route=account/account HTTP/1.1
 +Accept-Encoding: identity
 +Host: dacsan.babies.vn
 +Connection: close
 +Accept: text/plain
 +User-Agent: Python-urllib/2.7
 +
 +
 +GET /index.php?route=account/login HTTP/1.1
 +Accept-Encoding: identity
 +Host: dacsan.babies.vn
 +Connection: close
 +Accept: text/plain
 +User-Agent: Python-urllib/2.7
 </code> </code>
   * Send login to OpenCart with cookies header:<code python>   * Send login to OpenCart with cookies header:<code python>
Line 302: Line 328:
 with open('loginresult.html', 'wb') as f: with open('loginresult.html', 'wb') as f:
     f.write(data)     f.write(data)
-</code>Debug in httplib send:<code>+</code>Debug in httplib.HTTPConnection send:<code>
 POST /index.php?route=account/login HTTP/1.1 POST /index.php?route=account/login HTTP/1.1
 Accept-Encoding: identity Accept-Encoding: identity
Line 321: Line 347:
 User-Agent: Python-urllib/2.7 User-Agent: Python-urllib/2.7
 </code> </code>
 +==== Get Cookie Information ====
 +<code python>
 +import urllib, urllib2
 +import cookielib
 + 
 +loginurl = 'http://dacsan.babies.vn/index.php?route=account/login'
 +params = urllib.urlencode({'email': '[email protected]', 'password':'8941362', 'redirect':'http://dacsan.babies.vn/index.php?route=account/account'})
 +headers = {"Content-type": "application/x-www-form-urlencoded",
 +           "Accept": "text/plain"}
 +req = urllib2.Request(loginurl, data = params, headers = headers)
 +
 +cj = cookielib.CookieJar() 
 +opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) 
 +response = opener.open(req)
 +
 +for cookie in cj:
 +    print cookie.name, cookie.value
 +data = response.read()
 +with open('loginresult.html', 'wb') as f:
 +    f.write(data)
 +</code>output:<code>
 +currency VND
 +language vn
 +PHPSESSID 01eg0u7uf5bm3r01h6pnrv3q33
 +</code>
 +==== Post multipart form data ====
 +=== post with encode_multipart_formdata ===
 +Post with httplib<code python>
 +import httplib, mimetypes
 +
 +def post_multipart(host, selector, fields, files):
 +    """
 +    Post fields and files to an http host as multipart/form-data.
 +    fields is a sequence of (name, value) elements for regular form fields.
 +    files is a sequence of (name, filename, value) elements for data to be uploaded as files
 +    Return the server's response page.
 +    """
 +    content_type, body = encode_multipart_formdata(fields, files)
 +    h = httplib.HTTP(host)
 +    h.putrequest('POST', selector)
 +    h.putheader('content-type', content_type)
 +    h.putheader('content-length', str(len(body)))
 +    h.endheaders()
 +    h.send(body)
 +    errcode, errmsg, headers = h.getreply()
 +    return h.file.read()
 +
 +def encode_multipart_formdata(fields, files):
 +    """
 +    fields is a sequence of (name, value) elements for regular form fields.
 +    files is a sequence of (name, filename, value) elements for data to be uploaded as files
 +    Return (content_type, body) ready for httplib.HTTP instance
 +    """
 +    BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
 +    CRLF = '\r\n'
 +    L = []
 +    for (key, value) in fields:
 +        L.append('--' + BOUNDARY)
 +        L.append('Content-Disposition: form-data; name="%s"' % key)
 +        L.append('')
 +        L.append(value)
 +    for (key, filename, value) in files:
 +        L.append('--' + BOUNDARY)
 +        L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
 +        L.append('Content-Type: %s' % get_content_type(filename))
 +        L.append('')
 +        L.append(value)
 +    L.append('--' + BOUNDARY + '--')
 +    L.append('')
 +    body = CRLF.join(L)
 +    content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
 +    return content_type, body
 +
 +def get_content_type(filename):
 +    return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
 +</code>
 +=== fix encode_multipart_formdata for posting binary file ===
 +<code python>
 +def encode_multipart_formdata(fields, files):
 +    """
 +    fields is a sequence of (name, value) elements for regular form fields.
 +    files is a sequence of (name, filename, value) elements for data to be uploaded as files
 +    Return (content_type, body) ready for httplib.HTTP instance
 +    """    
 +    
 +    buf = StringIO()
 +    boundary = mimetools.choose_boundary()            
 +    for (key, value) in fields:
 +        buf.write('--%s\r\n' % boundary)
 +        buf.write('Content-Disposition: form-data; name="%s"' % key)
 +        buf.write('\r\n\r\n' + value + '\r\n')
 +    for (key, filename, value) in files:
 +        contenttype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
 +        buf.write('--%s\r\n' % boundary)
 +        buf.write('Content-Disposition: form-data; name="%s"; filename="%s"\r\n' % (key, filename))
 +        buf.write('Content-Type: %s\r\n' % contenttype)
 +        buf.write('\r\n' + value + '\r\n')
 +    buf.write('--' + boundary + '--\r\n\r\n')
 +    buf = buf.getvalue()
 +    content_type = 'multipart/form-data; boundary=%s' % boundary
 +    return content_type, buf
 +</code>
 +=== Post using MultipartPostHandler ===
 +  * Install:<code bash>
 +pip install MultipartPostHandler
 +</code>
 +  * example:<code python>
 +import MultipartPostHandler, urllib2, cookielib
 +
 +cookies = cookielib.CookieJar()
 +opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies),
 +                                MultipartPostHandler.MultipartPostHandler)
 +params = { "username" : "bob", "password" : "riviera",
 +           "file" : open("filename", "rb") }
 +opener.open("http://wwww.bobsite.com/upload/", params)
 +</code>
 +===== twisted internet =====
 +==== Simple Request ====
 +<code python>
 +from sys import argv
 +from pprint import pformat
 +
 +from twisted.internet.task import react
 +from twisted.web.client import Agent, readBody
 +from twisted.web.http_headers import Headers
 +
 +
 +def cbRequest(response):
 +    print 'Response version:', response.version
 +    print 'Response code:', response.code
 +    print 'Response phrase:', response.phrase
 +    print 'Response headers:'
 +    print pformat(list(response.headers.getAllRawHeaders()))
 +    d = readBody(response)
 +    d.addCallback(cbBody)
 +    return d
 +
 +def cbBody(body):
 +    print 'Response body:'
 +    print body
 +
 +def main(reactor, url=b"http://www.google.com/"):
 +    agent = Agent(reactor)
 +    d = agent.request(
 +        'GET', url,
 +        Headers({'User-Agent': ['Twisted Web Client Example']}),
 +        None)
 +    d.addCallback(cbRequest)
 +    return d
 +
 +react(main, argv[1:])
 +</code>
 +
python/internet.1407298545.txt.gz · Last modified: 2022/10/29 16:15 (external edit)