2022-02-02 16:08:10 +01:00
|
|
|
# German google fonts pages
|
|
|
|
|
|
|
|
A spider that's looking for german page with google fonts hosted on google.
|
|
|
|
|
2022-02-02 19:38:22 +01:00
|
|
|
Based on: https://docs.scrapy.org/en/latest/intro/tutorial.html
|
2022-02-02 16:08:10 +01:00
|
|
|
|
2022-02-02 19:38:22 +01:00
|
|
|
## Usage
|
|
|
|
|
2022-02-06 19:09:10 +01:00
|
|
|
pip3 install -e .
|
2022-02-02 20:07:56 +01:00
|
|
|
scrapy startproject ger_gfonts
|
2022-02-02 19:59:08 +01:00
|
|
|
cd ger_gfonts
|
2022-02-02 19:38:22 +01:00
|
|
|
scrapy crawl gfonts -O gfonts.json
|
2022-02-02 16:08:10 +01:00
|
|
|
|
|
|
|
## TODO
|
|
|
|
|
2022-02-06 19:09:10 +01:00
|
|
|
!Implement a crawling spider: https://doc.scrapy.org/en/latest/topics/spiders.html#crawlspider
|
|
|
|
|
2022-02-02 19:38:22 +01:00
|
|
|
Start checking for google analytics for all eu websites.
|
2022-02-02 16:08:10 +01:00
|
|
|
|
2022-02-06 19:09:10 +01:00
|
|
|
- eu countries tlds: https://www.whois365.com/en/listtld/europe
|
|
|
|
|
|
|
|
### meta pixel
|
|
|
|
|
|
|
|
<!-- Meta Pixel Code -->
|
|
|
|
<script>
|
|
|
|
!function(f,b,e,v,n,t,s)
|
|
|
|
{if(f.fbq)return;n=f.fbq=function(){n.callMethod?
|
|
|
|
n.callMethod.apply(n,arguments):n.queue.push(arguments)};
|
|
|
|
if(!f._fbq)f._fbq=n;n.push=n;n.loaded=!0;n.version='2.0';
|
|
|
|
n.queue=[];t=b.createElement(e);t.async=!0;
|
|
|
|
t.src=v;s=b.getElementsByTagName(e)[0];
|
|
|
|
s.parentNode.insertBefore(t,s)}(window, document,'script',
|
|
|
|
'https://connect.facebook.net/en_US/fbevents.js');
|
|
|
|
fbq('init', '898263220867925');
|
|
|
|
fbq('track', 'PageView');
|
|
|
|
</script>
|
|
|
|
<noscript><img height="1" width="1" style="display:none"
|
|
|
|
src="https://www.facebook.com/tr?id=898263220867925&ev=PageView&noscript=1"
|
|
|
|
/></noscript>
|
|
|
|
<!-- End Meta Pixel Code -->
|
|
|
|
|
2022-02-02 16:08:10 +01:00
|
|
|
## IDEAS
|
|
|
|
|
|
|
|
Make it into browserextension that would notify you.
|
2022-02-02 19:38:22 +01:00
|
|
|
|
|
|
|
## Checking website origin:
|
|
|
|
|
|
|
|
https://ipinfo.io/
|
|
|
|
|