CoCalc -- 2015-04-13-134419.ipynb

Jupyter notebook 2015-04-13-134419.ipynb

2015-04-13-134419.ipynb

⁶¹ views

Kernel: Python 2

In [2]:

from datetime import datetime
from datetime import date
a = datetime.strptime('2012-02-10', "%Y-%m-%d")
#atetime.date(2012, 2, 10)
#print x
#print date.isoformat(y)
print a.isoformat()
#'2002-03-11'


#a = datetime.strptime(x, "%Y-%m-%d")
#print a

Out[2]:

2012-02-10T00:00:00

In [3]:

L=[.5, .4, .2, .9]
searchplace1 = L.index(.4)
searchplace2 = L.index(.9)+1
for i in L[searchplace1:searchplace2]:
    print i

Out[3]:

4
2
9

In [4]:

from datetime import datetime as dt
date1 = '2013-10-11'
a1=dt.strptime(date1, "%Y-%m-%d").date()
date2 = '2013-10-12'
a2=dt.strptime(date2, "%Y-%m-%d").date()
date3 = '2013-10-13'
a3=dt.strptime(date3, "%Y-%m-%d").date()

datelist= [a1, a2, a3]
#print datelist
testvar = '2013-10-12'
testdate= dt.strptime(testvar, "%Y-%m-%d").date()
a = datelist.index(testdate)
b = datelist.index(testdate)+2
#a.isoformat()
for i in datelist[a:b]:
    print i

Out[4]:

2013-10-12
2013-10-13

In [5]:

import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
# Example data
people = ('Tosssm', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
#error = np.random.rand(len(people))
#xerr=error,
plt.barh(y_pos, performance,  align='center', alpha=0.4)
plt.yticks(y_pos, people)
plt.xlabel('Performance')
plt.title('Cumu_ARPU?')

plt.show()

Out[5]:

In [6]:

#!/usr/bin/env python
# a bar plot with errorbars
import numpy as np
import matplotlib.pyplot as plt

N = 5
menMeans = (20, 35, 30, 35, 27)
menStd =   (2, 3, 4, 1, 2)

ind = np.arange(N)  # the x locations for the groups
width = 0.35       # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(ind, menMeans, width, color='r', yerr=menStd)

womenMeans = (25, 32, 34, 20, 25)
womenStd =   (3, 5, 2, 3, 3)
rects2 = ax.bar(ind+width, womenMeans, width, color='y', yerr=womenStd)

# add some text for labels, title and axes ticks
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(ind+width)
ax.set_xticklabels( ('G1', 'G2', 'G3', 'G4', 'G5') )

ax.legend( (rects1[0], rects2[0]), ('Men', 'Women') )

def autolabel(rects):
    # attach some text labels
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height),
                ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)

plt.show()

Out[6]:

In [7]:

import pandas
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

CPI=[.5, .4]
cumu_arpu=[.6, .1]
allvalues=CPI+cumu_arpu

a = dict(graph=['Item one', 'Item two', 'Item three', 'ok'],
                           CPI = CPI, cumu_arpu=cumu_arpu)
#I'm going to have a list with the dates, that needs to be the first item in the dict, next is 
#df = pandas.DataFrame(a['graph'], a['CPI'], a['cumu_arpu']) 

df = pandas.DataFrame(dict(graph=['2015-03-01', '2015-03-02'],
                           CPI=CPI, cumu_arpu=cumu_arpu))

#print a
print df

ind = np.arange(len(df))
width = 0.4

#print ind

fig, ax = plt.subplots()
ax.barh(ind+width, df.cumu_arpu, width, color= 'LightSteelBlue', label= 'cumu_ARPU') #color='red',
ax.barh(ind, df.CPI, width, color='LightSalmon', label='CPI') #+ width

ax.set(yticks=ind + width, yticklabels=df.graph, ylim=[2*width - 1, len(df)], xlim= [0,max(allvalues)*1.7])
#ax.set(yticks=ind + width, yticklabels=df.graph, ylim=[2*width - 1, len(df)*2], xlim= [0,7])

fig.suptitle('Twitter: 2015-03-01 vs 2015-03-02')

ax.legend()

plt.show()

Out[7]:

   CPI  cumu_arpu       graph
0  0.5        0.6  2015-03-01
1  0.4        0.1  2015-03-02

In [8]:

import pandas
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

CPI=[.5, .2]
cumu_arpu=[.6, .4]
allvalues=CPI+cumu_arpu

a = dict(graph=['cumuarpu', 'cpi'],CPI = CPI, cumu_arpu=cumu_arpu)

#I'm going to have a list with the dates, that needs to be the first item in the dict, next is 
#df = pandas.DataFrame(a['graph'], a['CPI'], a['cumu_arpu']) 

df = pandas.DataFrame(dict(graph=['cumuarpu', 'cpi'],
                           CPI=CPI, cumu_arpu=cumu_arpu))

print a
print df

ind = np.arange(len(df))
width = 0.4

print ind

fig, ax = plt.subplots()
ax.barh(ind+width, df.cumu_arpu, width, color= 'DeepSkyBlue', label= 'Twitter') #color='red',
ax.barh(ind, df.CPI, width, color='Navy', label='Facebook') #+ width

ax.set(yticks=ind + width, yticklabels=df.graph, ylim=[2*width - 1, len(df)], xlim= [0,max(allvalues)*1.7])
#ax.set(yticks=ind + width, yticklabels=df.graph, ylim=[2*width - 1, len(df)*2], xlim= [0,7])


ax.legend()

plt.show()

Out[8]:

{'CPI': [0.5, 0.2], 'cumu_arpu': [0.6, 0.4], 'graph': ['cumuarpu', 'cpi']}
   CPI  cumu_arpu     graph
0  0.5        0.6  cumuarpu
1  0.2        0.4       cpi
[0 1]

In [9]:

import pandas
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

CPI=[.5, .2]
cumu_arpu=[.6, .4]
allvalues=CPI+cumu_arpu

a = dict(graph=['cumuarpu', 'cpi'],CPI = CPI, cumu_arpu=cumu_arpu)

#I'm going to have a list with the dates, that needs to be the first item in the dict, next is 
#df = pandas.DataFrame(a['graph'], a['CPI'], a['cumu_arpu']) 

df = pandas.DataFrame(dict(graph=['cumuarpu', 'cpi'],
                           CPI=CPI, cumu_arpu=cumu_arpu))

print a
print df

ind = np.arange(len(df))
width = 0.4

print ind

#fig, ax = plt.subplots()
#ax.barh(ind+width, df.cumu_arpu, width, color= 'DeepSkyBlue', label= 'Twitter') #color='red',
#ax.barh(ind, df.CPI, width, color='Navy', label='Facebook') #+ width

#ax.set(yticks=ind + width, yticklabels=df.graph, ylim=[2*width - 1, len(df)], xlim= [0,max(allvalues)*1.7])
#ax.set(yticks=ind + width, yticklabels=df.graph, ylim=[2*width - 1, len(df)*2], xlim= [0,7])



#ax.legend()

#plt.show()

Out[9]:

{'CPI': [0.5, 0.2], 'cumu_arpu': [0.6, 0.4], 'graph': ['cumuarpu', 'cpi']}
   CPI  cumu_arpu     graph
0  0.5        0.6  cumuarpu
1  0.2        0.4       cpi
[0 1]

In [10]:

a = dict(graph=['Item one', 'Item two', 'Item three', 'ok'],CPI = 'CPI', cumu_arpu='cumu_arpu')

print a

#df = pandas.DataFrame(a['graph'], a['CPI'], a['cumu_arpu'])

Out[10]:

{'CPI': 'CPI', 'cumu_arpu': 'cumu_arpu', 'graph': ['Item one', 'Item two', 'Item three', 'ok']}

In [11]:

import pandas
import matplotlib.pyplot as plt
import numpy as np
df = pandas.DataFrame(np.randn(8, 3), index=index,columns=['A', 'B', 'C'])

Out[11]:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-11-f0b83f5dfb6b> in <module>()
      2 import matplotlib.pyplot as plt
      3 import numpy as np
----> 4 df = pandas.DataFrame(np.randn(8, 3), index=index,columns=['A', 'B', 'C'])
      5 
AttributeError: 'module' object has no attribute 'randn'

In [12]:

x = [50, 50, 50]
y = 100
def breakeven(cost, revenuestreamlistvar): 
        n = 0
        totalrevenue = revenuestreamlistvar[n]
        while (y > totalrevenue) and (n < len(revenuestreamlistvar)-1):
                n = n + 1
                totalrevenue = totalrevenue + revenuestreamlistvar[n]
        if y > totalrevenue:
            n = "did not break even"
            print "no"
        #return n
        print 'the number of days to break even is %i' % n 

print x[0]
breakeven(y, x)

Out[12]:

50
the number of days to break even is 1

In [13]:

x = [50, 50, 50]
n = 0
y = 100
totalrevenue = x[n]
while (y > totalrevenue) and (n < len(x)-1):
                n = n + 1
                totalrevenue = totalrevenue + x[n]
if y > totalrevenue:
    n = "did not break even"
    print "no"
#return n
print 'the number of days to break even is %i' % n

Out[13]:

the number of days to break even is 1

In [14]:


CPI=[.5, .4, .2, .9]
cumu_arpu=[.6, .1, .3,.2]
allvalues=CPI+cumu_arpu

a = dict(graph=['Item one', 'Item two', 'Item three', 'ok'],
                           CPI = CPI, cumu_arpu=cumu_arpu)

print a['CPI']
a['CPI'].append(1)
print a['CPI']

Out[14]:

[0.5, 0.4, 0.2, 0.9]
[0.5, 0.4, 0.2, 0.9, 1]

In [ ]:

In [15]:

def basic_linear_regression(x, y):
    # Basic computations to save a little time.
    length = len(x)
    sum_x = sum(x)
    sum_y = sum(y)

    # Σx^2, and Σxy respectively.
    sum_x_squared = sum(map(lambda a: a * a, x))
    sum_of_products = sum([x[i] * y[i] for i in range(length)])

    # Magic formulae!  
    a = (sum_of_products - (sum_x * sum_y) / length) / (sum_x_squared - ((sum_x ** 2) / length))
    b = (sum_y - a * sum_x) / length
    print a, b
basic_linear_regression([1,2,3,4,5], [140,120, 100, 70, 60, 60])

#http://jmduke.com/posts/basic-linear-regressions-in-python/

Out[15]:

-39 227

In [16]:

from math import log
from scipy import stats
import numpy as np
x = list(range(1, 14))
#print x
#print math.exp(-x)
y = [100,95,90,85,80,75,70,65,60,55,50,48,46]
z = map(log, y)
#print z

#z = math.log10(y)
a = slope, intercept, r_value, p_value, std_err = stats.linregress(x,z)
print a
"""
for i in range(20):
    e = a[0]*i+a[1]
    print i, 
    print math.exp(e)
"""
#print map(log, b[0])
#print math.exp(-b)
"""
def breakeven(cost, revenuestreamlistvar):
    n = 0
    totalrevenue = revenuestreamlistvar[n]
    while (cost > totalrevenue) and (n < len(revenuestreamlistvar)-1):
        n = n + 1
        totalrevenue = totalrevenue + y[n]
    if cost > totalrevenue:
        while cost > totalrevenue:
            from math import log
            from scipy import stats
            import numpy as np
            xa = list(range(1, len(revenuestreamlistvar)+1))
            ya = revenuestreamlistvar
            za = map(log, y)
            aa = slope, intercept, r_value, p_value, std_err = stats.linregress(x,z)
            print a
            cost = cost*2
        n = "did not break even"
    print 'the breakeven point is %i days' % n 
    return n

#breakeven(1000, y)
"""

Out[16]:

(-0.068396252760348761, 4.7047313404642956, -0.99697847091833169, 1.4648709256120108e-13, 0.0016067572441345482)

'\ndef breakeven(cost, revenuestreamlistvar):\n    n = 0\n    totalrevenue = revenuestreamlistvar[n]\n    while (cost > totalrevenue) and (n < len(revenuestreamlistvar)-1):\n        n = n + 1\n        totalrevenue = totalrevenue + y[n]\n    if cost > totalrevenue:\n        while cost > totalrevenue:\n            from math import log\n            from scipy import stats\n            import numpy as np\n            xa = list(range(1, len(revenuestreamlistvar)+1))\n            ya = revenuestreamlistvar\n            za = map(log, y)\n            aa = slope, intercept, r_value, p_value, std_err = stats.linregress(x,z)\n            print a\n            cost = cost*2\n        n = "did not break even"\n    print \'the breakeven point is %i days\' % n \n    return n\n\n#breakeven(1000, y)\n'

In [17]:

f1 = [600 ,550, 500, 450, 400, 350, 300, 250, 200 ,150, 100, 90, 80, 70, 60, 50]
f2 = [800, 700, 600, 500, 400, 300, 250, 200, 180, 160, 140, 120, 100, 80, 60]
f3 = [900, 800, 700, 600, 500, 450, 350, 250, 200, 150, 100, 90, 80, 70]
f4 = [1200, 1100, 1000, 900, 800, 700, 600, 500, 400, 300, 200, 180, 160]

t1 = [100, 95, 90, 85, 80, 75, 70, 65, 60, 55, 50, 48, 46, 44, 42, 40]
t2 =[ 500, 500, 500, 500, 300, 280, 200, 180, 160, 140, 120, 100, 70, 60, 60]
t3 =[100, 95, 90, 85, 80, 75, 70, 65, 60, 55, 50 ,48 ,46, 44]
t4 =[100 ,95, 90, 85, 80, 75, 70, 65, 60, 55 ,50 ,48, 46] #13 values

allstream = [f1, t1,f2,t2,f3,t3,f4,t4]

In [18]:

from math import log
from scipy import stats
import numpy as np
f = [600 ,550, 500, 450, 400, 350, 300, 250, 200 ,150, 100, 90, 80, 70, 60, 50]
f2 = [800, 700, 600, 500, 400, 300, 250, 200, 180, 160, 140, 120, 100, 80, 60]
f3 = [900, 800, 700, 600, 500, 450, 350, 250, 200, 150, 100, 90, 80, 70]
f4 = [1200, 1100, 1000, 900, 800, 700, 600, 500, 400, 300, 200, 180, 160]

t1 = [100, 95, 90, 85, 80, 75, 70, 65, 60, 55, 50, 48, 46, 44, 42, 40]
t2 =[ 500, 500, 500, 500, 300, 280, 200, 180, 160, 140, 120, 100, 70, 60, 60]
t3 =[100, 95, 90, 85, 80, 75, 70, 65, 60, 55, 50 ,48 ,46, 44]
t4 =[100 ,95, 90, 85, 80, 75, 70, 65, 60, 55 ,50 ,48, 46]

x = list(range(1, len(f)-1)
#print x
#print math.exp(-x)
y = [600,550,500,450,400,350,300,250,200,150,100,90,80,70,60,50]
z = map(log, f)
#print z

#z = math.log10(y)
a = slope, intercept, r_value, p_value, std_err = stats.linregress(x,z)
r = a[2]
print  r**2

Out[18]:

  File "<ipython-input-18-541e57fd0486>", line 17
    y = [600,550,500,450,400,350,300,250,200,150,100,90,80,70,60,50]
    ^
SyntaxError: invalid syntax

In [21]:

from math import log
from scipy import stats
import numpy as np
x = list(range(1, len(f2)-1))
#print x
#print math.exp(-x)
y = f2
z = map(log, y)
#print z

#z = math.log10(y)

print x
print z
#a = slope, intercept, r_value, p_value, std_err = stats.linregress(x,z)
#print a

Out[21]:

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
[6.684611727667927, 6.551080335043404, 6.396929655216146, 6.214608098422191, 5.991464547107982, 5.703782474656201, 5.521460917862246, 5.298317366548036, 5.19295685089021, 5.075173815233827, 4.941642422609304, 4.787491742782046, 4.605170185988092, 4.382026634673881, 4.0943445622221]

In [20]:

from math import log
from scipy import stats
import numpy as np

def regresspath(variableto):
    y = variableto  #[100,95,90,85,80,75,70,65,60,55,50,48,46] 
    x = list(range(1, len(y)+1))
    #print x #print math.exp(-x) 
    z = map(log, y) 
    #print z 
    #z = math.log10(y)
    a = slope, intercept, r_value, p_value, std_err = stats.linregress(x,z)
    r = a[2]
    print dec(r**2)

for i in allstream:
    regresspath(i)

Out[20]:

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-20-ccc331ad703d> in <module>()
     15 
     16 for i in allstream:
---> 17     regresspath(i)

<ipython-input-20-ccc331ad703d> in regresspath(variableto)
     12     a = slope, intercept, r_value, p_value, std_err = stats.linregress(x,z)
     13     r = a[2]
---> 14     print dec(r**2)
     15 
     16 for i in allstream:
NameError: global name 'dec' is not defined

In [26]:

from math import log
from scipy import stats
import numpy as np
test1 = [2,10, 20, 50, 60]
test = map(log, test1)
print test

Out[26]:

[0.6931471805599453, 2.302585092994046, 2.995732273553991, 3.912023005428146, 4.0943445622221]

In [ ]:

Product

Resources

Company