1 2 3 4 5 6 7 8 9 10 11 |
#The Web Robots Pages #https://www.robotstxt.org/ #Web Robots (also known as Web Wanderers, Crawlers, or Spiders), are programs that traverse the Web automatically. #Search engines such as Google use them to index the web content, spammers use them to scan for email addresses, #and they have many other uses. Get-Content C:\urls.txt | % { $name=$_.replace('https://www.','') $name -match '[a-z]+(\.)' | Out-Null; $file=$Matches[0].Replace('.','') $find='/robots.txt' $url=$_+$find Start-BitsTransfer -Source $url -Destination C:\$file.txt } |