Python built-in library for TCP sockets
import socket
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('www.py4inf.com', 80))
Application:
Write a web browser:
A http request in python:
import socket
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('www.py4inf.com', 80))
mysock.send('GET http://www.py4inf.com/code/romeo.txt HTTP/1.0\n\n')
while True:
data = mysock.recv(512)
if ( len(data) < 1 ) :
break
print data
mysock.close()
urllib:
import urllib
fhand = urllib.urlopen('http://www.py4inf.com/code/romeo.txt')
for line in fhand:
print line.strip()
<HTML>
Use beautifulsoup to parse HTML.
Download beautifulsoup.py and put it with your python code.
http://www.crummy.com/software/BeautifulSoup/
import urllib
from BeautifulSoup import *
url = raw_input("Enter - ")
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
# Retrieve a list of anchor tags
# Each tag is like a dictionary of HTML Attributes
tags = soup('a')
for tag in tags:
print tag.get('href', None)
<XML>
use xml.etree.elementtree
import urllib
import xml.etree.ElementTree as ET
serviceurl = 'http://maps.googleapis.com/maps/api/geocode/xml?'
while True:
address = raw_input('Enter location: ')
if len(address) < 1 : break
url = serviceurl + urllib.urlencode({'sensor':'false', 'address': address})
print 'Retrieving', url
uh = urllib.urlopen(url)
data = uh.read()
print 'Retrieved',len(data),'characters'
print data
tree = ET.fromstring(data)
results = tree.findall('result')
lat = results[0].find('geometry').find('location').find('lat').text
lng = results[0].find('geometry').find('location').find('lng').text
location = results[0].find('formatted_address').text
print 'lat',lat,'lng',lng
print location
<JSON>
import json
info = json.loads(data)
info is a dictionary in Python.
So can use dictionary method to access the value associated with some key.
json list -> [{"key1":"v1"}, {"key2":"v2"}]
after loads, it is a list in Python
JSON vs. XML
JSON is easier to use, but XML is more expressive.
js = json.loads(data)
json.dumps(js. indent=4) # good formatting
<WEB Service Technology>
REST - Representational State Transfer - Remote resources which we create, read, update and delete remotely.
import json
import urllib
url = raw_input("Enter json location: ")
print "Retrieving ", url
urljson = urllib.urlopen(url)
data = urljson.read()
info = json.loads(data)
print 'Retrieved ', len(data), " characters"
print "Count ", len(info["comments"])
#print json.dumps(info, indent=4)
sumofcount = sum([item["count"] for item in info["comments"]])
print "Sum ", sumofcount
#for item in info["comments"]:
# sumofcount += item["count"]
#print "Sum ", sumofcount
#print type(info["comments"]["count"])
#print "Sum ", sum(info["comments"]["count"])
import urllib
import twurl
import json
TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'
while True:
print ''
acct = raw_input('Enter Twitter Account:')
if ( len(acct) < 1 ) : break
url = twurl.augment(TWITTER_URL,
{'screen_name': acct, 'count': '5'} )
print 'Retrieving', url
connection = urllib.urlopen(url)
data = connection.read()
headers = connection.info().dict
print 'Remaining', headers['x-rate-limit-remaining']
js = json.loads(data)
print json.dumps(js, indent=4)
for u in js['users'] :
print u['screen_name']
s = u['status']['text']
print ' ',s[:50]





No comments:
Post a Comment