[Python] Activestate Deki Bütün Örnekleri İndirin
-
ActiveState deki bütün örnek python kodlarını indirmek için bir script buldum arkadaşlar
Not:BeatifulSoup 4 kurulu olması gerekir ayık olun
Toplamda 20 mb lık bir sürü kod parçası .py uzantılı iniyor baya bi örnek varKlasör içine koyup çalıştırın
Örn:Desktop/ActiveState/import urllib as ul import bs4 as bs import urlparse as up import re as re import os.path as op import Queue as que import time import threading pat = re.compile('.*[\d]{4,7}.*') count=0 class dldfile(threading.Thread): def __init__(self,qu1): threading.Thread.__init__(self) self.qu1=qu1 self.ad='download/1/' def run(self): try: url,filename=self.qu1.get() url =url+self.ad #comment this line in case need to download whole web page instead of recipe ONLY... ul.urlretrieve(url,filename) global count except: print " RE-TRYING ", count= count - 1 self.qu1.put((url,filename)) self.run() finally: count= count +1 print str(count)+"("+str( threading.activeCount()) +")",filename self.qu1.task_done() class dload(threading.Thread ): def __init__(self,qu,url = "http://code.activestate.com/recipes/langs/python/?page=" ): threading.Thread.__init__(self) self.url= url self.q =que.Queue() self.qu=qu def run(self): ind=self.qu.get() url=self.url+str(ind) soup =bs.BeautifulSoup(''.join( ul.urlopen(url).readlines() )) bu = up.urlsplit(self.url) print 'started with the ' ,str(url).split('/')[-1], for i in soup.find_all(attrs = { "class" : "recipe-title"}): sp = up.urlsplit(i.a.get('href')) path = sp.path print path if re.search(pat, path): path = bu.scheme+'://'+bu.netloc+path filename = str(path).split('/')[-2] filename = op.join(op.abspath(op.curdir),filename+'.py') # recipe will be stored in given location # filename = op.join(op.abspath(op.curdir),filename+'.html') #uncomment the above line if downloading the web page for teh recipe print path self.q.put((path,filename)) self.fetch_data() time.sleep(1) self.qu.task_done() self.q.join() print 'done with the ' ,str(url).split('/')[-1], def fetch_data(self): Que1 = que.Queue() minitask =10 while not self.q.empty(): for i in range(minitask): x = dldfile(Que1) x.setDaemon(True) x.start() for j in range(minitask): Que1.put(self.q.get()) Que1.join() del x if __name__ =='__main__': task=5 Que = que.Queue() for k in range(1,190,task): # no. of pages included under the python tag. 188 is current count and 3700+ python recipes print "\n PAGE # : {0} \t \nDeploying Fresh threads\n".format(k) for i in range(task): t = dload(Que) t.start() for j in range(task): Que.put(k+j) Que.join() Que.queue.clear() del t print "DONE\n" time.sleep(2) del Que print "Our buisness finished"
-
up olsun. millet görsün.
-
up olsunn
-
sen mi yazdın?
-
Hayır hocam yabancı bir forumdan aldım alıntı yazmayı unutmuşum
-
Türkler python ile regular expression ve thread'i nadiren kullanıyor, o yüzden sordum :)
-
Çok iyi bu.
Toplam Hit: 1146 Toplam Mesaj: 7