18 lines
515 B
Python
18 lines
515 B
Python
|
import scrapy
|
||
|
#import pandas
|
||
|
from scrapy.linkextractors import LinkExtractor
|
||
|
#from utility.countries import getEuTlds
|
||
|
from utility import countries
|
||
|
|
||
|
class firstSpider(scrapy.Spider):
|
||
|
name = "start_urls"
|
||
|
|
||
|
def __init__(self):
|
||
|
eu_tlds = countries.getEuTlds()
|
||
|
self.start_urls = map(lambda t: 'https://www.google.com/search?q=inurl%3A' + t, eu_tlds.keys())
|
||
|
|
||
|
def parse(self, response):
|
||
|
xlink = LinkExtractor()
|
||
|
for link in xlink.extract_links(response):
|
||
|
print(link)
|