#!/usr/bin/env python '''Returns a list of URLs that are found in standard input. These URLs must be between quotes ("" or '') and must start with http:// Modified from Python Recipe 30270 by Yuriy Tkachenko: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302700 ''' __RCS__ = '$Id: url_scrape.py,v 1.4 2005/06/19 18:18:34 darren Exp darren $' __version__ = '$Revision: 1.4 $' __initialdate__ = 'June 2005' __author__ = 'Darren Paul Griffith, http://www.madphilosopher.ca/' import re import sys if __name__ == '__main__': # Pattern for fully-qualified URLs: url_pattern = re.compile('''["']http://[^+]*?['"]''') # build list of all URLs found in standard input s = sys.stdin.read() all = url_pattern.findall(s) # output all the URLs for i in all: print i.strip('"').strip("'")