西瓜科学家的笔记本: Web

Python - Web

Python built-in library for TCP sockets
import socket

mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('www.py4inf.com', 80))

Application:

Write a web browser:
A http request in python:

import socket

mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('www.py4inf.com', 80))

mysock.send('GET http://www.py4inf.com/code/romeo.txt HTTP/1.0\n\n')

while True:
    data = mysock.recv(512)
    if ( len(data) < 1 ) :
        break
    print data
mysock.close()

urllib:

import urllib

fhand = urllib.urlopen('http://www.py4inf.com/code/romeo.txt')

for line in fhand:
    print line.strip()

<HTML>
Use beautifulsoup to parse HTML.
Download beautifulsoup.py and put it with your python code.
http://www.crummy.com/software/BeautifulSoup/

import urllib
from BeautifulSoup import *

url = raw_input("Enter - ")

html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)

# Retrieve a list of anchor tags
# Each tag is like a dictionary of HTML Attributes

tags = soup('a')

for tag in tags:
    print tag.get('href', None)

<XML>

use xml.etree.elementtree

import urllib
import xml.etree.ElementTree as ET

serviceurl = 'http://maps.googleapis.com/maps/api/geocode/xml?'

while True:
    address = raw_input('Enter location: ')
    if len(address) < 1 : break

    url = serviceurl + urllib.urlencode({'sensor':'false', 'address': address})
    print 'Retrieving', url
    uh = urllib.urlopen(url)
    data = uh.read()
    print 'Retrieved',len(data),'characters'
    print data
    tree = ET.fromstring(data)


    results = tree.findall('result')
    lat = results[0].find('geometry').find('location').find('lat').text
    lng = results[0].find('geometry').find('location').find('lng').text
    location = results[0].find('formatted_address').text

    print 'lat',lat,'lng',lng
    print location

<JSON>
import json

info = json.loads(data)
info is a dictionary in Python.
So can use dictionary method to access the value associated with some key.

json list -> [{"key1":"v1"}, {"key2":"v2"}]
after loads, it is a list in Python

JSON vs. XML
JSON is easier to use, but XML is more expressive.
js = json.loads(data)
json.dumps(js. indent=4) # good formatting

<WEB Service Technology>
REST - Representational State Transfer - Remote resources which we create, read, update and delete remotely.

import json
import urllib

url = raw_input("Enter json location: ")

print "Retrieving ", url

urljson = urllib.urlopen(url)
data = urljson.read()
info = json.loads(data)
print 'Retrieved ', len(data), " characters"

print "Count ", len(info["comments"])

#print json.dumps(info, indent=4)
sumofcount = sum([item["count"] for item in info["comments"]])
print "Sum ", sumofcount
#for item in info["comments"]:
#    sumofcount += item["count"]

#print "Sum ", sumofcount
#print type(info["comments"]["count"])
#print "Sum ", sum(info["comments"]["count"])

import urllib
import twurl
import json

TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'

while True:
    print ''
    acct = raw_input('Enter Twitter Account:')
    if ( len(acct) < 1 ) : break
    url = twurl.augment(TWITTER_URL,
        {'screen_name': acct, 'count': '5'} )
    print 'Retrieving', url
    connection = urllib.urlopen(url)
    data = connection.read()
    headers = connection.info().dict
    print 'Remaining', headers['x-rate-limit-remaining']
    js = json.loads(data)
    print json.dumps(js, indent=4)

    for u in js['users'] :
        print u['screen_name']
        s = u['status']['text']
        print '  ',s[:50]

西瓜科学家的笔记本

Tuesday, November 10, 2015

Web - Python

No comments:

Post a Comment

Blog Archive