# # This is a quick and dirty script to pull the most likely url and description # for a list of terms. Here's how you use it: # # python term2url.py < {a txt file with a list of terms} > {a tab delimited file of results} # # You'll must install the simpljson module to use it # import urllib import urllib2 import simplejson import sys # Read the terms we want to convert into URL from info redirected from the command line terms = sys.stdin.readlines() for term in terms: # Define the query to pass to Google Search API query = urllib.urlencode({'q' : term.rstrip("\n")}) url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s" % (query) # Fetch the results and convert to JSON format search_results = urllib2.urlopen(url) json = simplejson.loads(search_results.read()) # Process the results by pulling the first record, which has the best match results = json['responseData']['results'] for r in results[:1]: url = r['url'] desc = r['content'].encode('ascii', 'replace') # Print the results to stdout. Use redirect to capture the output print "%s\t%s\t%s" % (term.rstrip("\n"), url, desc)