Title:           [Python] Pastebin Parsing Script
Author:          PonyPornDispenser
Pastebin link:   http://pastebin.com/Fh375wji
First Edit:      Saturday 25th of July 2015 07:39:42 PM CDT
Last Edit:       Saturday 25th of July 2015 07:39:42 PM CDT

import urllib2
import re
import sys
import operator
def getmostcharacters(l1, l2):
    highchars = []
    s = sum(l2)
    dictionary = dict(zip(l1, l2))
    dictionary[""] = 0
    h1 = ""
    h2 = ""
    h3 = ""
    for i in range(len(l1)):
        if l2[i] > dictionary[h1]:
            h3 = h2
            h2 = h1
            h1 = l1[i]
        elif l2[i] > dictionary[h2]:
            h3 = h2
 
            h2 = l1[i]
        elif l2[i] > dictionary[h3]:
            h3 = l1[i]
    if h1 != "":
        highchars.append(h1)
    if h2 != "":
        highchars.append(h2)
    if h3 != "":
        highchars.append(h3)
        return highchars
    else:
        return ["uncategorized"]
def isnsfw(l1, l2):
    for i in range(len(l2)):
        if l1.count(l2[i]) >= 1:
            return "NSFW"
    return "SFW"
api_key= "1bd4b82919f280d3e449cab6f169da33"
raw_url = "http://pastebin.com/raw.php?i="
url = "http://pastebin.com/"
f = open("pasteids.txt", "r")
frl = f.readlines()
characters = ["Celestia", "Luna", "Cadance", "Velvet", "Twilight", "Maud", "Pinkie", "Applejack", "Rainbow", "Fluttershy", "Nurse Redheart",
              "Sweetie Belle", "Scootaloo", "Apple Bloom", "Spike", "Babs Seed", "Granny", "Tiara", "Silver Spoon",
              "Sunset", "Nightmare", "Chrysalis", "Adagio", "Sonata", "Aria", "Trixie", "Gilda", "Fleur", "Sugar Belle",
              "Spitfire", "Aloe", "Lotus", "Coco Pommel", "Tree Hugger", "Cloudchaser", "Flitter", "Blossomforth", "Lightning Dust"]
nsfw = ["pussy", "balls", "cock", "dick", "penis", "semen", "cum", "vagina", "horsepussy", "dripping"]
charamounts = []
b = open("somanypastes.txt", "w")
massbin = open("megapaste.txt", "w")
x = 0
print(len(frl))
for i in frl[::-1]:
    print(x)
    x += 1
    charamounts = []
    for q in range(len(characters)):
        charamounts.append(0)
    if i.split("\n")[0] != "":
        a = raw_url + i.split("\n")[0]
        try:
            request = urllib2.Request(a)
            resp = urllib2.urlopen(request)
            html = resp.read()
            allwords = html.split(" ")
            for z in allwords:
                z = re.sub('[^0-9a-zA-Z]+', '', z).lower()
            for z in range(len(characters)):
                charamounts[z] = allwords.count(characters[z])
            rating = isnsfw(allwords, nsfw)
            b.write(url + i.split("\n")[0] + " " + rating + " " + ", ".join(getmostcharacters(characters, charamounts)) + "\n")
            ##print(i.split("\n")[0] + " " + rating + " " + ", ".join(getmostcharacters(characters, charamounts)))
        except urllib2.HTTPError, e:
            ##print(e.code)
            pass
        except urllib2.URLError, e:
            ##print(e)
            pass
        except KeyboardInterrupt:
            f.close()
            b.close()
            massbin.close()
            print "Exit."
            sys.exit()
f.close()
b.close()
massbin.close()