Script STOPS when Source URL is not Status 200
I have a script (see below) that checks (more or less) linkpositions on a website, it works fine but as soon as the source url on which the link lies is not a 200 response it will quit, I just want it to jump ahead to the next or give back some message "error" or even better give me back the http status code. I need a quick solution, if anyone can help me that would be super awesome :)
URLs.csv = list of websites that contain a link to a certain page
domain.com = domain for which to check whether a link is there or not and if yes where is it located roughly.
import csv
from lxml import html
with open('URLs.csv', 'r') as csvfile:
urls = [row[0] for row in csv.reader(csvfile)]
for url in urls:
print url
doc = html.parse(url)
if doc.xpath('//a[contains(@href,"domain.com")]'):
for anchor_node in doc.xpath('//a[contains(@href,"finanzen.de")]'):
if anchor_node.xpath('./ancestor::div[contains(@class, "sidebar")]'):
print 'Sidebar'
elif anchor_node.xpath('./parent::div[contains(@class, "widget")]'):
print 'Sidebar'
elif anchor_node.xpath('./ancestor::div[contains(@id, "sidebar")]'):
print 'Sidebar'
elif anchor_node.xpath('./ancestor::div[contains(@class, "comment")]'):
print 'Kommentar'
elif anchor_node.xpath('./ancestor::div[contains(@id, "comment")]'):
print 'Kommentar'
elif anchor_node.xpath('./ancestor::div[contains(@class, "foot")]'):
print "Footer"
elif anchor_node.xpath('./ancestor::div[contains(@id, "foot")]'):
print "Footer"
elif anchor_node.xpath('./ancestor::div[contains(@class, "post")]'):
print "Contextual"
else:
print 'Unidentified Link'
else:
print 'Link is Dead'
I have a script (see below) that checks (more or less) linkpositions on a website, it works fine but as soon as the source url on which the link lies is not a 200 response it will quit, I just want it to jump ahead to the next or give back some message "error" or even better give me back the http status code. I need a quick solution, if anyone can help me that would be super awesome :)
URLs.csv = list of websites that contain a link to a certain page
domain.com = domain for which to check whether a link is there or not and if yes where is it located roughly.
import csv
from lxml import html
with open('URLs.csv', 'r') as csvfile:
urls = [row[0] for row in csv.reader(csvfile)]
for url in urls:
print url
doc = html.parse(url)
if doc.xpath('//a[contains(@href,"domain.com")]'):
for anchor_node in doc.xpath('//a[contains(@href,"finanzen.de")]'):
if anchor_node.xpath('./ancestor::div[contains(@class, "sidebar")]'):
print 'Sidebar'
elif anchor_node.xpath('./parent::div[contains(@class, "widget")]'):
print 'Sidebar'
elif anchor_node.xpath('./ancestor::div[contains(@id, "sidebar")]'):
print 'Sidebar'
elif anchor_node.xpath('./ancestor::div[contains(@class, "comment")]'):
print 'Kommentar'
elif anchor_node.xpath('./ancestor::div[contains(@id, "comment")]'):
print 'Kommentar'
elif anchor_node.xpath('./ancestor::div[contains(@class, "foot")]'):
print "Footer"
elif anchor_node.xpath('./ancestor::div[contains(@id, "foot")]'):
print "Footer"
elif anchor_node.xpath('./ancestor::div[contains(@class, "post")]'):
print "Contextual"
else:
print 'Unidentified Link'
else:
print 'Link is Dead'
No comments:
Post a Comment