📚 The CoCalc Library - books, templates and other resources
License: OTHER
1try:2import numpy as np3from requests import get4from bs4 import BeautifulSoup56789stars_to_explore = ( 2**np.arange( -1, 16 ) ).astype("int")10forks_to_explore = ( 2**np.arange( -1, 16 ) ).astype("int")11repo_with_stars = np.ones_like( stars_to_explore )12repo_with_forks = np.ones_like( forks_to_explore )1314URL = "https://github.com/search"15print "Scrapping data from Github. Sorry Github..."16print "The data is contained in variables `foo_to_explore` and `repo_with_foo`"1718print "stars first..."19payload = {"q":""}20for i, _star in enumerate(stars_to_explore):21payload["q"] = "stars:>=%d"%_star22r = get( URL, params = payload )23soup = BeautifulSoup( r.text )24try:25h3 = soup.find( class_="sort-bar").find( "h3" ).text #hopefully the github search results page plays nicely.26value = int( h3.split(" ")[2].replace(",", "" ) )27except AttributeError as e:28#there might be less than 10 repos, so I'll count the number of display results29value = len( soup.findAll(class_= "mega-icon-public-repo" ) )3031repo_with_stars[i] = value32print "number of repos with greater than or equal to %d stars: %d"%(_star, value )3334#repo_with_stars = repo_with_stars.astype("float")/repo_with_stars[0]35363738print "forks second..."39payload = {"q":""}40for i, _fork in enumerate(stars_to_explore):41payload["q"] = "forks:>=%d"%_fork42r = get( URL, params = payload )43soup = BeautifulSoup( r.text )44try:45h3 = soup.find( class_="sort-bar").find( "h3" ).text #hopefully the github search results page plays nicely.46value = int( h3.split(" ")[2].replace(",", "" ) )47except AttributeError as e:48#there might be less than 10 repos, so I'll count the number of display results49value = len( soup.findAll(class_= "mega-icon-public-repo" ) )5051repo_with_forks[i] = value52print "number of repos with greater than or equal to %d forks: %d"%(_fork, value )5354#repo_with_forks = repo_with_forks.astype("float")/repo_with_forks[0]5556np.savetxt( "data/gh_forks.csv", np.concatenate( [forks_to_explore, repo_with_forks], axis=1) )57np.savetxt( "data/gh_stars.csv", np.concatenate( [stars_to_explore, repo_with_stars], axis=1) )5859except ImportError as e:60print e61print "requests / BeautifulSoup not found. Using data pulled on Feburary 11, 2013"62_data = np.genfromtxt( "data/gh_forks.csv", delimiter = "," ) #cehck this.63forks_to_explore = _data[:,0]64repo_with_forks = _data[:,1]6566_data = np.genfromtxt( "data/gh_stars.csv", delimiter = "," ) #cehck this.67stars_to_explore = _data[:,0]68repo_with_stars = _data[:,1]6970717273