A simple script to scrape pastebin at random intervals.

Check here for an up to date version of this script.

import bs4, requests, random, time, csv, string
from fake_useragent import UserAgent
from random import randint
from time import sleep

def random_string(stringLength=8):
    lettersAndDigits = string.ascii_letters + string.digits
    return ''.join((random.choice(lettersAndDigits) for i in range(stringLength)))

urls = str("https://pastebin.com/raw/" + random_string(8))
print("Initializing url list")
# initialize urllist container list
urllist = []

# generate a certain amount of urls
print("Creating url list")
for i in range(0,50):
    urllist.append(urls)
#        print(urllist)

## initializing the UserAgent object
user_agent = UserAgent()

print("Starting...")
## starting the loop
for url in urllist:

    ## getting the reponse from the page using get method of requests module
    print("Loading fake user agent")
    page = requests.get(url, headers={"user-agent": user_agent.chrome})
    print("Sleeping random interval")
    sleep(randint(1,30))

## storing the content of the page in a variable
    html = page.content

## creating BeautifulSoup object
    soup = bs4.BeautifulSoup(html, "html.parser")
    title = str(soup.find('title'))
    print(title)
    if title != "<title>Pastebin.com - Page Removed</title>":
        goodpage= soup.prettify()
        f = open("Page.txt", "a")
        f.write(goodpage)
        f.close()
        print("Wrote new page to file")