Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132977 views
License: OTHER
1
2
try:
3
import numpy as np
4
from requests import get
5
from bs4 import BeautifulSoup
6
7
8
9
10
stars_to_explore = ( 2**np.arange( -1, 16 ) ).astype("int")
11
forks_to_explore = ( 2**np.arange( -1, 16 ) ).astype("int")
12
repo_with_stars = np.ones_like( stars_to_explore )
13
repo_with_forks = np.ones_like( forks_to_explore )
14
15
URL = "https://github.com/search"
16
print "Scrapping data from Github. Sorry Github..."
17
print "The data is contained in variables `foo_to_explore` and `repo_with_foo`"
18
print
19
print "stars first..."
20
payload = {"q":""}
21
for i, _star in enumerate(stars_to_explore):
22
payload["q"] = "stars:>=%d"%_star
23
r = get( URL, params = payload )
24
soup = BeautifulSoup( r.text )
25
try:
26
h3 = soup.find( class_="sort-bar").find( "h3" ).text #hopefully the github search results page plays nicely.
27
value = int( h3.split(" ")[2].replace(",", "" ) )
28
except AttributeError as e:
29
#there might be less than 10 repos, so I'll count the number of display results
30
value = len( soup.findAll(class_= "mega-icon-public-repo" ) )
31
32
repo_with_stars[i] = value
33
print "number of repos with greater than or equal to %d stars: %d"%(_star, value )
34
35
#repo_with_stars = repo_with_stars.astype("float")/repo_with_stars[0]
36
37
38
print
39
print "forks second..."
40
payload = {"q":""}
41
for i, _fork in enumerate(stars_to_explore):
42
payload["q"] = "forks:>=%d"%_fork
43
r = get( URL, params = payload )
44
soup = BeautifulSoup( r.text )
45
try:
46
h3 = soup.find( class_="sort-bar").find( "h3" ).text #hopefully the github search results page plays nicely.
47
value = int( h3.split(" ")[2].replace(",", "" ) )
48
except AttributeError as e:
49
#there might be less than 10 repos, so I'll count the number of display results
50
value = len( soup.findAll(class_= "mega-icon-public-repo" ) )
51
52
repo_with_forks[i] = value
53
print "number of repos with greater than or equal to %d forks: %d"%(_fork, value )
54
55
#repo_with_forks = repo_with_forks.astype("float")/repo_with_forks[0]
56
57
np.savetxt( "data/gh_forks.csv", np.concatenate( [forks_to_explore, repo_with_forks], axis=1) )
58
np.savetxt( "data/gh_stars.csv", np.concatenate( [stars_to_explore, repo_with_stars], axis=1) )
59
60
except ImportError as e:
61
print e
62
print "requests / BeautifulSoup not found. Using data pulled on Feburary 11, 2013"
63
_data = np.genfromtxt( "data/gh_forks.csv", delimiter = "," ) #cehck this.
64
forks_to_explore = _data[:,0]
65
repo_with_forks = _data[:,1]
66
67
_data = np.genfromtxt( "data/gh_stars.csv", delimiter = "," ) #cehck this.
68
stars_to_explore = _data[:,0]
69
repo_with_stars = _data[:,1]
70
71
72
73