python:internet
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
python:internet [2014/08/06 03:56] – [urllib2.build_opener(replace urlopen) and post with cookies] admin | python:internet [2022/10/29 16:15] (current) – external edit 127.0.0.1 | ||
---|---|---|---|
Line 131: | Line 131: | ||
</ | </ | ||
</ | </ | ||
- | ===== httplib and urllib2 ===== | + | ===== httplib and urllib2(with addinfourl) |
* The basic class for processing http is httplib package | * The basic class for processing http is httplib package | ||
* urllib2 is a Python module that can be used for fetching URLs. It defines functions and classes to help with URL actions (basic and digestauthentication, | * urllib2 is a Python module that can be used for fetching URLs. It defines functions and classes to help with URL actions (basic and digestauthentication, | ||
Line 252: | Line 252: | ||
response = opener.open(' | response = opener.open(' | ||
print response.read() | print response.read() | ||
+ | </ | ||
+ | import urllib, urllib2 | ||
+ | params = urllib.urlencode({' | ||
+ | headers = {" | ||
+ | " | ||
+ | req = urllib2.Request(" | ||
+ | |||
+ | opener = urllib2.build_opener(urllib2.HTTPHandler()) | ||
+ | response = opener.open(req) | ||
+ | print ' | ||
+ | print response.read() | ||
+ | </ | ||
+ | headers: Date: Wed, 06 Aug 2014 04:01:39 GMT | ||
+ | Server: BaseHTTP/ | ||
+ | Expires: Wed, 06 Aug 2014 04:01:38 GMT | ||
+ | Content-Type: | ||
+ | Cache-Control: | ||
+ | Via: 1.1 bugs.python.org | ||
+ | Connection: close | ||
+ | Transfer-Encoding: | ||
</ | </ | ||
- | * Send login to OpenCart:< | + | * Send normal |
import urllib, urllib2 | import urllib, urllib2 | ||
Line 260: | Line 280: | ||
headers = {" | headers = {" | ||
" | " | ||
+ | |||
req = urllib2.Request(loginurl, | req = urllib2.Request(loginurl, | ||
response = urllib2.urlopen(req) | response = urllib2.urlopen(req) | ||
- | print ' | + | |
data = response.read() | data = response.read() | ||
with open(' | with open(' | ||
f.write(data) | f.write(data) | ||
+ | </ | ||
+ | POST / | ||
+ | Accept-Encoding: | ||
+ | Content-Length: | ||
+ | Host: dacsan.babies.vn | ||
+ | Accept: text/plain | ||
+ | User-Agent: Python-urllib/ | ||
+ | Connection: close | ||
+ | Content-Type: | ||
+ | |||
+ | redirect=http%3A%2F%2Fdacsan.babies.vn%2Findex.php%3Froute%3Daccount%2Faccount& | ||
+ | GET / | ||
+ | Accept-Encoding: | ||
+ | Host: dacsan.babies.vn | ||
+ | Connection: close | ||
+ | Accept: text/plain | ||
+ | User-Agent: Python-urllib/ | ||
+ | |||
+ | |||
+ | GET / | ||
+ | Accept-Encoding: | ||
+ | Host: dacsan.babies.vn | ||
+ | Connection: close | ||
+ | Accept: text/plain | ||
+ | User-Agent: Python-urllib/ | ||
</ | </ | ||
+ | * Send login to OpenCart with cookies header:< | ||
+ | import urllib, urllib2 | ||
+ | |||
+ | loginurl = ' | ||
+ | params = urllib.urlencode({' | ||
+ | headers = {" | ||
+ | " | ||
+ | req = urllib2.Request(loginurl, | ||
+ | |||
+ | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor()) | ||
+ | response = opener.open(req) | ||
+ | |||
+ | data = response.read() | ||
+ | with open(' | ||
+ | f.write(data) | ||
+ | </ | ||
+ | POST / | ||
+ | Accept-Encoding: | ||
+ | Content-Length: | ||
+ | Host: dacsan.babies.vn | ||
+ | Accept: text/plain | ||
+ | User-Agent: Python-urllib/ | ||
+ | Connection: close | ||
+ | Content-Type: | ||
+ | |||
+ | redirect=http%3A%2F%2Fdacsan.babies.vn%2Findex.php%3Froute%3Daccount%2Faccount& | ||
+ | GET / | ||
+ | Accept-Encoding: | ||
+ | Host: dacsan.babies.vn | ||
+ | Cookie: currency=VND; | ||
+ | Connection: close | ||
+ | Accept: text/plain | ||
+ | User-Agent: Python-urllib/ | ||
+ | </ | ||
+ | ==== Get Cookie Information ==== | ||
+ | <code python> | ||
+ | import urllib, urllib2 | ||
+ | import cookielib | ||
+ | |||
+ | loginurl = ' | ||
+ | params = urllib.urlencode({' | ||
+ | headers = {" | ||
+ | " | ||
+ | req = urllib2.Request(loginurl, | ||
+ | |||
+ | cj = cookielib.CookieJar() | ||
+ | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) | ||
+ | response = opener.open(req) | ||
+ | |||
+ | for cookie in cj: | ||
+ | print cookie.name, | ||
+ | data = response.read() | ||
+ | with open(' | ||
+ | f.write(data) | ||
+ | </ | ||
+ | currency VND | ||
+ | language vn | ||
+ | PHPSESSID 01eg0u7uf5bm3r01h6pnrv3q33 | ||
+ | </ | ||
+ | ==== Post multipart form data ==== | ||
+ | === post with encode_multipart_formdata === | ||
+ | Post with httplib< | ||
+ | import httplib, mimetypes | ||
+ | |||
+ | def post_multipart(host, | ||
+ | """ | ||
+ | Post fields and files to an http host as multipart/ | ||
+ | fields is a sequence of (name, value) elements for regular form fields. | ||
+ | files is a sequence of (name, filename, value) elements for data to be uploaded as files | ||
+ | Return the server' | ||
+ | """ | ||
+ | content_type, | ||
+ | h = httplib.HTTP(host) | ||
+ | h.putrequest(' | ||
+ | h.putheader(' | ||
+ | h.putheader(' | ||
+ | h.endheaders() | ||
+ | h.send(body) | ||
+ | errcode, errmsg, headers = h.getreply() | ||
+ | return h.file.read() | ||
+ | |||
+ | def encode_multipart_formdata(fields, | ||
+ | """ | ||
+ | fields is a sequence of (name, value) elements for regular form fields. | ||
+ | files is a sequence of (name, filename, value) elements for data to be uploaded as files | ||
+ | Return (content_type, | ||
+ | """ | ||
+ | BOUNDARY = ' | ||
+ | CRLF = ' | ||
+ | L = [] | ||
+ | for (key, value) in fields: | ||
+ | L.append(' | ||
+ | L.append(' | ||
+ | L.append('' | ||
+ | L.append(value) | ||
+ | for (key, filename, value) in files: | ||
+ | L.append(' | ||
+ | L.append(' | ||
+ | L.append(' | ||
+ | L.append('' | ||
+ | L.append(value) | ||
+ | L.append(' | ||
+ | L.append('' | ||
+ | body = CRLF.join(L) | ||
+ | content_type = ' | ||
+ | return content_type, | ||
+ | |||
+ | def get_content_type(filename): | ||
+ | return mimetypes.guess_type(filename)[0] or ' | ||
+ | </ | ||
+ | === fix encode_multipart_formdata for posting binary file === | ||
+ | <code python> | ||
+ | def encode_multipart_formdata(fields, | ||
+ | """ | ||
+ | fields is a sequence of (name, value) elements for regular form fields. | ||
+ | files is a sequence of (name, filename, value) elements for data to be uploaded as files | ||
+ | Return (content_type, | ||
+ | """ | ||
+ | | ||
+ | buf = StringIO() | ||
+ | boundary = mimetools.choose_boundary() | ||
+ | for (key, value) in fields: | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | for (key, filename, value) in files: | ||
+ | contenttype = mimetypes.guess_type(filename)[0] or ' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf.write(' | ||
+ | buf = buf.getvalue() | ||
+ | content_type = ' | ||
+ | return content_type, | ||
+ | </ | ||
+ | === Post using MultipartPostHandler === | ||
+ | * Install:< | ||
+ | pip install MultipartPostHandler | ||
+ | </ | ||
+ | * example:< | ||
+ | import MultipartPostHandler, | ||
+ | |||
+ | cookies = cookielib.CookieJar() | ||
+ | opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookies), | ||
+ | MultipartPostHandler.MultipartPostHandler) | ||
+ | params = { " | ||
+ | " | ||
+ | opener.open(" | ||
+ | </ | ||
+ | ===== twisted internet ===== | ||
+ | ==== Simple Request ==== | ||
+ | <code python> | ||
+ | from sys import argv | ||
+ | from pprint import pformat | ||
+ | |||
+ | from twisted.internet.task import react | ||
+ | from twisted.web.client import Agent, readBody | ||
+ | from twisted.web.http_headers import Headers | ||
+ | |||
+ | |||
+ | def cbRequest(response): | ||
+ | print ' | ||
+ | print ' | ||
+ | print ' | ||
+ | print ' | ||
+ | print pformat(list(response.headers.getAllRawHeaders())) | ||
+ | d = readBody(response) | ||
+ | d.addCallback(cbBody) | ||
+ | return d | ||
+ | |||
+ | def cbBody(body): | ||
+ | print ' | ||
+ | print body | ||
+ | |||
+ | def main(reactor, | ||
+ | agent = Agent(reactor) | ||
+ | d = agent.request( | ||
+ | ' | ||
+ | Headers({' | ||
+ | None) | ||
+ | d.addCallback(cbRequest) | ||
+ | return d | ||
+ | |||
+ | react(main, argv[1:]) | ||
+ | </ | ||
+ |
python/internet.1407297405.txt.gz · Last modified: 2022/10/29 16:15 (external edit)