python:internet
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
python:internet [2014/08/06 04:17] – [urllib2.build_opener(replace urlopen) and post with cookies] admin | python:internet [2022/10/29 16:15] (current) – external edit 127.0.0.1 | ||
---|---|---|---|
Line 131: | Line 131: | ||
</ | </ | ||
</ | </ | ||
- | ===== httplib and urllib2 ===== | + | ===== httplib and urllib2(with addinfourl) |
* The basic class for processing http is httplib package | * The basic class for processing http is httplib package | ||
* urllib2 is a Python module that can be used for fetching URLs. It defines functions and classes to help with URL actions (basic and digestauthentication, | * urllib2 is a Python module that can be used for fetching URLs. It defines functions and classes to help with URL actions (basic and digestauthentication, | ||
Line 287: | Line 287: | ||
with open(' | with open(' | ||
f.write(data) | f.write(data) | ||
- | </ | + | </ |
POST / | POST / | ||
Accept-Encoding: | Accept-Encoding: | ||
Line 328: | Line 328: | ||
with open(' | with open(' | ||
f.write(data) | f.write(data) | ||
- | </ | + | </ |
POST / | POST / | ||
Accept-Encoding: | Accept-Encoding: | ||
Line 347: | Line 347: | ||
User-Agent: Python-urllib/ | User-Agent: Python-urllib/ | ||
</ | </ | ||
+ | ==== Get Cookie Information ==== | ||
+ | <code python> | ||
+ | import urllib, urllib2 | ||
+ | import cookielib | ||
+ | |||
+ | loginurl = ' | ||
+ | params = urllib.urlencode({' | ||
+ | headers = {" | ||
+ | " | ||
+ | req = urllib2.Request(loginurl, | ||
+ | |||
+ | cj = cookielib.CookieJar() | ||
+ | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) | ||
+ | response = opener.open(req) | ||
+ | |||
+ | for cookie in cj: | ||
+ | print cookie.name, | ||
+ | data = response.read() | ||
+ | with open(' | ||
+ | f.write(data) | ||
+ | </ | ||
+ | currency VND | ||
+ | language vn | ||
+ | PHPSESSID 01eg0u7uf5bm3r01h6pnrv3q33 | ||
+ | </ | ||
+ | ==== Post multipart form data ==== | ||
+ | === post with encode_multipart_formdata === | ||
+ | Post with httplib< | ||
+ | import httplib, mimetypes | ||
+ | |||
+ | def post_multipart(host, | ||
+ | """ | ||
+ | Post fields and files to an http host as multipart/ | ||
+ | fields is a sequence of (name, value) elements for regular form fields. | ||
+ | files is a sequence of (name, filename, value) elements for data to be uploaded as files | ||
+ | Return the server' | ||
+ | """ | ||
+ | content_type, | ||
+ | h = httplib.HTTP(host) | ||
+ | h.putrequest(' | ||
+ | h.putheader(' | ||
+ | h.putheader(' | ||
+ | h.endheaders() | ||
+ | h.send(body) | ||
+ | errcode, errmsg, headers = h.getreply() | ||
+ | return h.file.read() | ||
+ | |||
+ | def encode_multipart_formdata(fields, | ||
+ | """ | ||
+ | fields is a sequence of (name, value) elements for regular form fields. | ||
+ | files is a sequence of (name, filename, value) elements for data to be uploaded as files | ||
+ | Return (content_type, | ||
+ | """ | ||
+ | BOUNDARY = ' | ||
+ | CRLF = ' | ||
+ | L = [] | ||
+ | for (key, value) in fields: | ||
+ | L.append(' | ||
+ | L.append(' | ||
+ | L.append('' | ||
+ | L.append(value) | ||
+ | for (key, filename, value) in files: | ||
+ | L.append(' | ||
+ | L.append(' | ||
+ | L.append(' | ||
+ | L.append('' | ||
+ | L.append(value) | ||
+ | L.append(' | ||
+ | L.append('' | ||
+ | body = CRLF.join(L) | ||
+ | content_type = ' | ||
+ | return content_type, | ||
+ | |||
+ | def get_content_type(filename): | ||
+ | return mimetypes.guess_type(filename)[0] or ' | ||
+ | </ | ||
+ | === fix encode_multipart_formdata for posting binary file === | ||
+ | <code python> | ||
+ | def encode_multipart_formdata(fields, | ||
+ | """ | ||
+ | fields is a sequence of (name, value) elements for regular form fields. | ||
+ | files is a sequence of (name, filename, value) elements for data to be uploaded as files | ||
+ | Return (content_type, | ||
+ | """ | ||
+ | | ||
+ | buf = StringIO() | ||
+ | boundary = mimetools.choose_boundary() | ||
+ | for (key, value) in fields: | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | for (key, filename, value) in files: | ||
+ | contenttype = mimetypes.guess_type(filename)[0] or ' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf = buf.getvalue() | ||
+ | content_type = ' | ||
+ | return content_type, | ||
+ | </ | ||
+ | === Post using MultipartPostHandler === | ||
+ | * Install:< | ||
+ | pip install MultipartPostHandler | ||
+ | </ | ||
+ | * example:< | ||
+ | import MultipartPostHandler, | ||
+ | |||
+ | cookies = cookielib.CookieJar() | ||
+ | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies), | ||
+ | MultipartPostHandler.MultipartPostHandler) | ||
+ | params = { " | ||
+ | " | ||
+ | opener.open(" | ||
+ | </ | ||
+ | ===== twisted internet ===== | ||
+ | ==== Simple Request ==== | ||
+ | <code python> | ||
+ | from sys import argv | ||
+ | from pprint import pformat | ||
+ | |||
+ | from twisted.internet.task import react | ||
+ | from twisted.web.client import Agent, readBody | ||
+ | from twisted.web.http_headers import Headers | ||
+ | |||
+ | |||
+ | def cbRequest(response): | ||
+ | print ' | ||
+ | print ' | ||
+ | print ' | ||
+ | print ' | ||
+ | print pformat(list(response.headers.getAllRawHeaders())) | ||
+ | d = readBody(response) | ||
+ | d.addCallback(cbBody) | ||
+ | return d | ||
+ | |||
+ | def cbBody(body): | ||
+ | print ' | ||
+ | print body | ||
+ | |||
+ | def main(reactor, | ||
+ | agent = Agent(reactor) | ||
+ | d = agent.request( | ||
+ | ' | ||
+ | Headers({' | ||
+ | None) | ||
+ | d.addCallback(cbRequest) | ||
+ | return d | ||
+ | |||
+ | react(main, argv[1:]) | ||
+ | </ | ||
+ |
python/internet.1407298625.txt.gz · Last modified: 2022/10/29 16:15 (external edit)