In [293]:

Copied!





import numpy as np
from scipy import stats
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
sns.set()
%matplotlib inline
import numpy as np
from scipy import stats
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
sns.set()
%matplotlib inline

In [294]:

Copied!

plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['figure.figsize'] = (12, 8)

The Normal distribution¶

In [295]:

Copied!

# Plotting the standard normal distribution
range = np.arange(-3,3,0.001)
plt.plot(range, stats.norm.pdf(range, 0, 1));
# Plotting the standard normal distribution
range = np.arange(-3,3,0.001)
plt.plot(range, stats.norm.pdf(range, 0, 1));

No description has been provided for this image

In [296]:

Copied!





# Here we have both the pdf and cdf and notice that we here use samples
x = np.linspace(-3,3,1000)
mu = 0
sigma = 1
y_pdf = stats.norm.pdf(x,mu,sigma)
y_cdf = stats.norm.cdf(x,mu,sigma)

plt.plot(x, y_pdf, label='pdf')
plt.plot(x, y_cdf, label='cdf')

plt.legend();
# Here we have both the pdf and cdf and notice that we here use samples
x = np.linspace(-3,3,1000)
mu = 0
sigma = 1
y_pdf = stats.norm.pdf(x,mu,sigma)
y_cdf = stats.norm.cdf(x,mu,sigma)

plt.plot(x, y_pdf, label='pdf')
plt.plot(x, y_cdf, label='cdf')

plt.legend();

In [297]:

Copied!





# Let us create a UDF which we can use to plot later on
def plot_normal(x_range, mu=0, sigma=1, cdf=False, **kwargs):
    '''
    Plots the normal distribution function for a given x range
    If mu and sigma are not provided, standard normal is plotted
    If cdf=True cumulative distribution is plotted
    Passes any keyword arguments to matplotlib plot function
    '''
    x = x_range
    if cdf:
        y = stats.norm.cdf(x, mu, sigma)
    else:
        y = stats.norm.pdf(x, mu, sigma)
    plt.plot(x, y, **kwargs)
# Let us create a UDF which we can use to plot later on
def plot_normal(x_range, mu=0, sigma=1, cdf=False, **kwargs):
    '''
    Plots the normal distribution function for a given x range
    If mu and sigma are not provided, standard normal is plotted
    If cdf=True cumulative distribution is plotted
    Passes any keyword arguments to matplotlib plot function
    '''
    x = x_range
    if cdf:
        y = stats.norm.cdf(x, mu, sigma)
    else:
        y = stats.norm.pdf(x, mu, sigma)
    plt.plot(x, y, **kwargs)

In [298]:

Copied!





# As you can see, we get exactly the same as above
x = np.linspace(-3, 3, 1000)

plot_normal(x, label='pdf')
plot_normal(x, cdf=True, label='cdf')
plt.legend();
# As you can see, we get exactly the same as above
x = np.linspace(-3, 3, 1000)

plot_normal(x, label='pdf')
plot_normal(x, cdf=True, label='cdf')
plt.legend();

In [299]:

Copied!





x = 13
mu = 10
sigma = np.sqrt(4)

prob = stats.norm.sf(x,mu,sigma)
print('a) P(X > ' + repr(x) + ')' + ' = ' + repr(round(prob,4)))
n = np.linspace(0, 20, 1000)

plot_normal(n,mu,sigma, label='pdf')
plt.legend();
x = 13
mu = 10
sigma = np.sqrt(4)

prob = stats.norm.sf(x,mu,sigma)
print('a) P(X > ' + repr(x) + ')' + ' = ' + repr(round(prob,4)))
n = np.linspace(0, 20, 1000)

plot_normal(n,mu,sigma, label='pdf')
plt.legend();

a) P(X > 13) = 0.0668

In [300]:

Copied!

x = np.linspace(-3, 3, 1000)

plot_normal(x, label='pdf')
plot_normal(x, cdf=True, label='cdf')
plt.legend();
x = np.linspace(-3, 3, 1000)

plot_normal(x, label='pdf')
plot_normal(x, cdf=True, label='cdf')
plt.legend();

In [302]:

Copied!





# Let us see what happens when we change our parameters
x = np.linspace(-5, 5, 1000)

plot_normal(x, -2, 1, color='red', lw=2, ls='-', alpha=0.5)
plot_normal(x, 2, 1, color='blue', lw=2, ls='-', alpha=0.5)
plot_normal(x, 0, 1, color='green', lw=2, ls='-', alpha=0.5);
# Let us see what happens when we change our parameters
x = np.linspace(-5, 5, 1000)

plot_normal(x, -2, 1, color='red', lw=2, ls='-', alpha=0.5)
plot_normal(x, 2, 1, color='blue', lw=2, ls='-', alpha=0.5)
plot_normal(x, 0, 1, color='green', lw=2, ls='-', alpha=0.5);

In [303]:

Copied!

x = np.linspace(-3, 3, 1000)
plot_normal(x, label='pdf')
plt.legend();
x = np.linspace(-3, 3, 1000)
plot_normal(x, label='pdf')
plt.legend();

In [304]:

Copied!





# Let us create a UDF which will enable us to shade areas under the curve

def draw_z_score(x, cond, mu, sigma, title):
    y = stats.norm.pdf(x, mu, sigma)
    z = x[cond]
    plt.plot(x, y)
    plt.fill_between(z, 0, stats.norm.pdf(z, mu, sigma))
    plt.title(title)
    plt.show()
# Let us create a UDF which will enable us to shade areas under the curve

def draw_z_score(x, cond, mu, sigma, title):
    y = stats.norm.pdf(x, mu, sigma)
    z = x[cond]
    plt.plot(x, y)
    plt.fill_between(z, 0, stats.norm.pdf(z, mu, sigma))
    plt.title(title)
    plt.show()

In [305]:

Copied!

x = np.linspace(-3,3,1000)
z0 = -0.75
draw_z_score(x, x<z0, 0, 1, 'z<-0.75')
x = np.linspace(-3,3,1000)
z0 = -0.75
draw_z_score(x, x

In [306]:

Copied!

z0 = 0.75
draw_z_score(x, (-z0 < x) & (x < z0), 0, 1, '-0.75<z<0.75')
z0 = 0.75
draw_z_score(x, (-z0 < x) & (x < z0), 0, 1, '-0.75

In [307]:

Copied!

z0 = -0.75
z1 = 1.8
draw_z_score(x, (z0 < x) & (x < z1), 0, 1, '-0.75<z<1.8')
z0 = -0.75
z1 = 1.8
draw_z_score(x, (z0 < x) & (x < z1), 0, 1, '-0.75

In [308]:

Copied!

z0 = -1.96
z1 = 1.96
draw_z_score(x, (z0 < x) & (x < z1), 0, 1, '-1.96<z<1.96')
z0 = -1.96
z1 = 1.96
draw_z_score(x, (z0 < x) & (x < z1), 0, 1, '-1.96

Example 4-13/14 page 125-126¶

In [309]:

Copied!





x = 13
mu = 10
sigma = np.sqrt(4)
n = np.linspace(0, 20, 1000)

prob = stats.norm.sf(x,mu,sigma)
print('P(X > ' + repr(x) + ')' + ' = ' + repr(round(prob,4)))


draw_z_score(n, x<=n, mu, sigma, 'X > 13')

x1 = 9
x2 = 11

prob2 = stats.norm.cdf(x2, mu, sigma)-stats.norm.cdf(x1, mu, sigma)
print('P('+ repr(x1) + ' < X < ' + repr(x2) + ')' + 
      ' = ' + repr(round(prob2,4)))

draw_z_score(n, (x1 <= n) & (n <= x2), mu, sigma, '9 < X < 11')
x = 13
mu = 10
sigma = np.sqrt(4)
n = np.linspace(0, 20, 1000)

prob = stats.norm.sf(x,mu,sigma)
print('P(X > ' + repr(x) + ')' + ' = ' + repr(round(prob,4)))


draw_z_score(n, x<=n, mu, sigma, 'X > 13')

x1 = 9
x2 = 11

prob2 = stats.norm.cdf(x2, mu, sigma)-stats.norm.cdf(x1, mu, sigma)
print('P('+ repr(x1) + ' < X < ' + repr(x2) + ')' + 
      ' = ' + repr(round(prob2,4)))

draw_z_score(n, (x1 <= n) & (n <= x2), mu, sigma, '9 < X < 11')

P(X > 13) = 0.0668

P(9 < X < 11) = 0.3829

Normal distribution: histogram and PDF¶

Explore the normal distribution: a histogram built from samples and the PDF (probability density function).

In [310]:

Copied!





# Sample from a normal distribution using numpy's random number generator
samples = np.random.normal(size=10000)

# Compute a histogram of the sample
bins = np.linspace(-3, 3, 30)
histogram, bins = np.histogram(samples, bins=bins, normed=True)
bin_centers = 0.5*(bins[1:] + bins[:-1])

# Compute the PDF on the bin centers from scipy distribution object

pdf = stats.norm.pdf(bin_centers)

plt.plot(bin_centers, histogram, label="Histogram of samples")
plt.plot(bin_centers, pdf, label="PDF")
plt.legend()
plt.show()
# Sample from a normal distribution using numpy's random number generator
samples = np.random.normal(size=10000)

# Compute a histogram of the sample
bins = np.linspace(-3, 3, 30)
histogram, bins = np.histogram(samples, bins=bins, normed=True)
bin_centers = 0.5*(bins[1:] + bins[:-1])

# Compute the PDF on the bin centers from scipy distribution object

pdf = stats.norm.pdf(bin_centers)

plt.plot(bin_centers, histogram, label="Histogram of samples")
plt.plot(bin_centers, pdf, label="PDF")
plt.legend()
plt.show()

In [311]:

Copied!





# Same as above but a different type of histogram. Let's change n.
mu, sigma = 0, 1
s = np.random.normal(mu, sigma,1000)

# Create the bins and histogram
count, bins, ignored = plt.hist(s, 30, density=True)

# Plot the distribution curve
pdf = stats.norm.pdf(bins)
plt.plot(bins, pdf, label="PDF")
plt.show()
# Same as above but a different type of histogram. Let's change n.
mu, sigma = 0, 1
s = np.random.normal(mu, sigma,1000)

# Create the bins and histogram
count, bins, ignored = plt.hist(s, 30, density=True)

# Plot the distribution curve
pdf = stats.norm.pdf(bins)
plt.plot(bins, pdf, label="PDF")
plt.show()

Importing data - in this case from Excel¶

In [312]:

Copied!

# Notice that I do not use headers
df = pd.read_excel('C:/Users/RIB/OneDrive/Documents/Arbejde/Stochastic modeling and processes/Old/Excel/EX06_14.XLS', header=None)
# Notice that I do not use headers
df = pd.read_excel('C:/Users/RIB/OneDrive/Documents/Arbejde/Stochastic modeling and processes/Old/Excel/EX06_14.XLS', header=None)

In [313]:

Copied!

df.head()
df.head()

Out[313]:

	0
0	88.5
1	94.7
2	84.3
3	90.1
4	89.0

In [314]:

Copied!

df.describe()
df.describe()

Out[314]:

	0
count	83.000000
mean	90.533735
std	2.888382
min	83.400000
25%	88.600000
50%	90.400000
75%	92.200000
max	100.300000

In [315]:

Copied!

df.shape
df.shape

Out[315]:

(83, 1)

In [316]:

Copied!

df.ndim
df.ndim

Out[316]:

In [317]:

Copied!

df.dtypes
df.dtypes

Out[317]:

0    float64
dtype: object

In [318]:

Copied!

# To plot this data, we need to sort and then fit the data to a normal
# distribution, basically, we insert the values for mu and std.

x = sorted(df[0])
plot_normal(x, np.mean(x),np.std(x))
# To plot this data, we need to sort and then fit the data to a normal
# distribution, basically, we insert the values for mu and std.

x = sorted(df[0])
plot_normal(x, np.mean(x),np.std(x))

In [319]:

Copied!





# Plot the distribution curve and add a histogram - a few tweeks
pdf = stats.norm.pdf(x, np.mean(x), np.std(x))
plt.plot(x, pdf, '-o', label="PDF")
plt.hist(df[0], density=True, rwidth=0.93)

plt.xlabel('Motor Fuel Octane Ratings')
plt.ylabel('Probability')
plt.legend()
plt.show()
# Plot the distribution curve and add a histogram - a few tweeks
pdf = stats.norm.pdf(x, np.mean(x), np.std(x))
plt.plot(x, pdf, '-o', label="PDF")
plt.hist(df[0], density=True, rwidth=0.93)

plt.xlabel('Motor Fuel Octane Ratings')
plt.ylabel('Probability')
plt.legend()
plt.show()

In [320]:

Copied!





# Plot the distribution curve and histogram
pdf = stats.norm.pdf(sorted(df[0]), np.mean(df[0]), np.std(df[0]))
plt.plot(sorted(df[0]), pdf, '-o', color='g')
plt.hist(df[0], density=True, rwidth=0.9)
plt.show()
# Plot the distribution curve and histogram
pdf = stats.norm.pdf(sorted(df[0]), np.mean(df[0]), np.std(df[0]))
plt.plot(sorted(df[0]), pdf, '-o', color='g')
plt.hist(df[0], density=True, rwidth=0.9)
plt.show()

Normal Probability Plot¶

For many applications, it will be important to make sure that our data is normally distributed, especially if we have small sample sizes. We can do this in many ways, but one of the most commonly used ways is a normal probability plot.

In [321]:

Copied!

stats.probplot(df[0], plot=plt)
plt.ylabel('Motor Fuel Octane Ratings')
plt.show()
stats.probplot(df[0], plot=plt)
plt.ylabel('Motor Fuel Octane Ratings')
plt.show()

Plotting a Kernel Density Estimate (KDE)¶

In this tutorial, you’ve been working with samples, statistically speaking. Whether the data is discrete or continuous, it’s assumed to be derived from a population that has a true, exact distribution described by just a few parameters.

A kernel density estimation (KDE) is a way to estimate the probability density function (PDF) of the random variable that “underlies” our sample. KDE is a means of data smoothing.

In [322]:

Copied!

# Now, above we fitted using a normal distribution, here we kind of
# make a new distribution from the data and plot it.

df.plot.kde();
# Now, above we fitted using a normal distribution, here we kind of
# make a new distribution from the data and plot it.

df.plot.kde();

In [323]:

Copied!





fig, ax = plt.subplots()
df.plot.kde(ax=ax, legend=False, title='Histogram')
df.plot.hist(density=True, ax=ax, legend=False, rwidth=0.9)
ax.set_ylabel('Probability')
ax.grid(axis='y')
ax.set_facecolor('#d8dcd6')
fig, ax = plt.subplots()
df.plot.kde(ax=ax, legend=False, title='Histogram')
df.plot.hist(density=True, ax=ax, legend=False, rwidth=0.9)
ax.set_ylabel('Probability')
ax.grid(axis='y')
ax.set_facecolor('#d8dcd6')

Plotting the cdf and cumulative histogram¶

In [324]:

Copied!





cdf = stats.norm.cdf(sorted(df[0]), np.mean(df[0]), np.std(df[0]))
plt.plot(sorted(df[0]), cdf, '-o', color='g')
plt.hist(df[0], density=True, rwidth=0.9, cumulative = True)
plt.show()
cdf = stats.norm.cdf(sorted(df[0]), np.mean(df[0]), np.std(df[0]))
plt.plot(sorted(df[0]), cdf, '-o', color='g')
plt.hist(df[0], density=True, rwidth=0.9, cumulative = True)
plt.show()

Comparing data¶

In [325]:

Copied!

df2 = pd.read_excel('C:/Users/RIB/OneDrive/Documents/Arbejde/Stochastic modeling and processes/Smoking_and_Cancer.xlsx')
df2 = pd.read_excel('C:/Users/RIB/OneDrive/Documents/Arbejde/Stochastic modeling and processes/Smoking_and_Cancer.xlsx')

In [326]:

Copied!

df2.head()
df2.head()

Out[326]:

	State	# Cigarettes Cigarettes sold	Deaths per 100K bladder cancer	Deaths per 100K lung cancer	Deaths per 100K kidney cancer	Deaths per 100K leukemia
0	AL	18.20	2.90	17.05	1.59	6.15
1	AZ	25.82	3.52	19.80	2.75	6.61
2	AR	18.24	2.99	15.98	2.02	6.94
3	CA	28.60	4.46	22.07	2.66	7.06
4	CT	31.10	5.11	22.83	3.35	7.20

In [327]:

Copied!

df2.describe()
df2.describe()

Out[327]:

	# Cigarettes Cigarettes sold	Deaths per 100K bladder cancer	Deaths per 100K lung cancer	Deaths per 100K kidney cancer	Deaths per 100K leukemia
count	44.000000	44.000000	44.000000	44.000000	44.000000
mean	24.914091	4.121136	19.653182	2.794545	6.829773
std	5.573286	0.964925	4.228122	0.519080	0.638259
min	14.000000	2.860000	12.010000	1.590000	4.900000
25%	21.230000	3.207500	16.437500	2.457500	6.532500
50%	23.765000	4.065000	20.320000	2.845000	6.860000
75%	28.097500	4.782500	22.807500	3.112500	7.207500
max	42.400000	6.540000	27.270000	4.320000	8.280000

In [328]:

Copied!





# Note - the colors do not match, haven't figured out how to do that yet!
fig, ax = plt.subplots()
df2.plot.kde(ax=ax, title='Histogram')
df2.plot.hist(density=True, ax=ax)
ax.set_ylabel('Probability')
ax.grid(axis='x')
ax.set_facecolor('#d8dcd6')
# Note - the colors do not match, haven't figured out how to do that yet!
fig, ax = plt.subplots()
df2.plot.kde(ax=ax, title='Histogram')
df2.plot.hist(density=True, ax=ax)
ax.set_ylabel('Probability')
ax.grid(axis='x')
ax.set_facecolor('#d8dcd6')

Boxplots¶

In [329]:

Copied!





fig, ax = plt.subplots()
ax.set_title('Basic Plot')
ax.boxplot(df[0])
plt.show()
fig, ax = plt.subplots()
ax.set_title('Basic Plot')
ax.boxplot(df[0])
plt.show()

In [330]:

Copied!

df3 = pd.read_excel('C:/Users/RIB/OneDrive/Documents/Arbejde/Stochastic modeling and processes/Old/Excel/EX06_12.XLS', header=None)
df3 = pd.read_excel('C:/Users/RIB/OneDrive/Documents/Arbejde/Stochastic modeling and processes/Old/Excel/EX06_12.XLS', header=None)

In [331]:

Copied!





fig, ax = plt.subplots()
ax.set_title('Basic Plot')
ax.boxplot(df3[0])
plt.show()
fig, ax = plt.subplots()
ax.set_title('Basic Plot')
ax.boxplot(df3[0])
plt.show()

In [332]:

Copied!

df4 = pd.read_excel('C:/Users/RIB/OneDrive/Documents/Arbejde/Stochastic modeling and processes/Old/Excel/EX06_57.XLS')
df4 = pd.read_excel('C:/Users/RIB/OneDrive/Documents/Arbejde/Stochastic modeling and processes/Old/Excel/EX06_57.XLS')

In [333]:

Copied!

df4.head()
df4.head()

Out[333]:

	High Dose	Control
0	16.1	297.1
1	134.9	491.8
2	52.7	1332.9
3	14.4	1172.0
4	124.3	1482.7

In [334]:

Copied!

df4.shape
df4.shape

Out[334]:

(60, 2)

In [335]:

Copied!

df4.isna().sum()
df4.isna().sum()

Out[335]:

High Dose    38
Control       0
dtype: int64

In [336]:

Copied!

 df.isnull().any().any()
 df.isnull().any().any()

Out[336]:

False

In [337]:

Copied!

 df.isnull().any()
 df.isnull().any()

Out[337]:

0    False
dtype: bool

In [338]:

Copied!

df4['High Dose'].dropna()
df4['High Dose'].dropna()

Out[338]:

0      16.1
1     134.9
2      52.7
3      14.4
4     124.3
5      99.0
6      24.3
7      16.3
8      15.2
9      47.7
10     12.9
11     72.7
12    126.7
13     46.4
14     60.3
15     23.5
16     43.6
17     79.4
18     38.0
19     58.2
20     26.5
21     25.1
Name: High Dose, dtype: float64

In [339]:

Copied!





fig, ax = plt.subplots()
ax.set_title('Basic Plot')
ax.boxplot(df4['High Dose'].dropna())
plt.show()
fig, ax = plt.subplots()
ax.set_title('Basic Plot')
ax.boxplot(df4['High Dose'].dropna())
plt.show()

In [340]:

Copied!





fig, ax = plt.subplots()
ax.set_title('Basic Plot')
ax.boxplot(df4['Control'].dropna())
plt.show()
fig, ax = plt.subplots()
ax.set_title('Basic Plot')
ax.boxplot(df4['Control'].dropna())
plt.show()

================================ The Exponential Distribution¶

In [341]:

Copied!





# Like above, we will create a UDF which we can use to plot
def plot_exponential(x_range, mu=0, sigma=1, cdf=False, **kwargs):

    x = x_range
    if cdf:
        y = stats.expon.cdf(x, mu, sigma)
    else:
        y = stats.expon.pdf(x, mu, sigma)
    plt.plot(x, y, **kwargs)
# Like above, we will create a UDF which we can use to plot
def plot_exponential(x_range, mu=0, sigma=1, cdf=False, **kwargs):

    x = x_range
    if cdf:
        y = stats.expon.cdf(x, mu, sigma)
    else:
        y = stats.expon.pdf(x, mu, sigma)
    plt.plot(x, y, **kwargs)

In [ ]:

In [342]:

Copied!





# Like above, we will create a UDF which we can use to plot
def plot_exponential(x_range, cdf=False, **kwargs):

    x = x_range
    if cdf:
        y = stats.expon.cdf(x)
    else:
        y = stats.expon.pdf(x)
    plt.plot(x, y, **kwargs)
# Like above, we will create a UDF which we can use to plot
def plot_exponential(x_range, cdf=False, **kwargs):

    x = x_range
    if cdf:
        y = stats.expon.cdf(x)
    else:
        y = stats.expon.pdf(x)
    plt.plot(x, y, **kwargs)

In [343]:

Copied!

# Plotting with default mu

x = np.linspace(0, 5, 5000)

plot_exponential(x, color='red', lw=2, ls='-', alpha=0.5, label='pdf')
plot_exponential(x, cdf=True, color='blue', lw=2, ls='-', alpha=0.5, label='cdf')
plt.legend();
# Plotting with default mu

x = np.linspace(0, 5, 5000)

plot_exponential(x, color='red', lw=2, ls='-', alpha=0.5, label='pdf')
plot_exponential(x, cdf=True, color='blue', lw=2, ls='-', alpha=0.5, label='cdf')
plt.legend();

In [344]:

Copied!





x = np.linspace(0, 5, 5000)
pdf = stats.expon.pdf(x)
r = stats.expon.rvs(size=5000)
plt.plot(x, pdf)
plt.hist(r, density=True, rwidth=0.9)
plt.show()
x = np.linspace(0, 5, 5000)
pdf = stats.expon.pdf(x)
r = stats.expon.rvs(size=5000)
plt.plot(x, pdf)
plt.hist(r, density=True, rwidth=0.9)
plt.show()

In [345]:

Copied!





# Exercise from slide
prob2 = stats.expon.cdf(x=2,scale=1/0.4)
prob1 = stats.expon.cdf(x=1,scale=1/0.4)
y1 = stats.expon.pdf(np.arange(0,1,0.001), scale=1/0.4)
y2 = stats.expon.pdf(np.arange(1,2,0.001), scale=1/0.4)
y3 = stats.expon.pdf(np.arange(2,10,0.001), scale=1/0.4)
x1 = np.arange(0,1,0.001)
x2 = np.arange(1,2,0.001)
x3 = np.arange(2,10,0.001)
plt.fill_between(x1, y1, facecolor='blue', alpha=0.5)
plt.fill_between(x2, y2, facecolor='red', alpha=0.5)
plt.fill_between(x3, y3, facecolor='blue', alpha=0.5)
plt.text(x=0.3, y=0.2, s= round(prob1,3))
plt.text(x=1.3, y=0.1, s= round(prob2-prob1,3))
plt.text(x=2.5, y=0.05, s= round(1-prob2,3))
plt.show()
# Exercise from slide
prob2 = stats.expon.cdf(x=2,scale=1/0.4)
prob1 = stats.expon.cdf(x=1,scale=1/0.4)
y1 = stats.expon.pdf(np.arange(0,1,0.001), scale=1/0.4)
y2 = stats.expon.pdf(np.arange(1,2,0.001), scale=1/0.4)
y3 = stats.expon.pdf(np.arange(2,10,0.001), scale=1/0.4)
x1 = np.arange(0,1,0.001)
x2 = np.arange(1,2,0.001)
x3 = np.arange(2,10,0.001)
plt.fill_between(x1, y1, facecolor='blue', alpha=0.5)
plt.fill_between(x2, y2, facecolor='red', alpha=0.5)
plt.fill_between(x3, y3, facecolor='blue', alpha=0.5)
plt.text(x=0.3, y=0.2, s= round(prob1,3))
plt.text(x=1.3, y=0.1, s= round(prob2-prob1,3))
plt.text(x=2.5, y=0.05, s= round(1-prob2,3))
plt.show()

In [346]:

Copied!





def draw_exp(x, cond, mu, title):
    y = stats.expon.pdf(x, scale=mu)
    z = x[cond]
    plt.plot(x, y)
    plt.fill_between(z, 0, stats.expon.pdf(z, scale=mu))
    plt.title(title)
    plt.show()
def draw_exp(x, cond, mu, title):
    y = stats.expon.pdf(x, scale=mu)
    z = x[cond]
    plt.plot(x, y)
    plt.fill_between(z, 0, stats.expon.pdf(z, scale=mu))
    plt.title(title)
    plt.show()

In [347]:

Copied!





x = np.arange(0,10,0.001)
x1 = 1
x2 = 2
draw_exp(x, (x1 < x) & (x < x2), 1/0.4, '1 < X < 2')
x = np.arange(0,10,0.001)
x1 = 1
x2 = 2
draw_exp(x, (x1 < x) & (x < x2), 1/0.4, '1 < X < 2')

Ex 5¶

Let $X$ denote the time in hours from the start of the interval until the first log-on.

In [348]:

Copied!





# I could actually import the expon method!!!!

from scipy.stats import expon

mu=1/25
x = 0.1

prob1 = stats.expon.sf(x, 0, mu)
print('P(X > ' + repr(x) + ')' + ' = ' + repr(round(prob1,4)))

n = np.arange(0,0.3,0.01)
draw_exp(n, (x <= n), 1/25, 'X > 6 min')

x1 = 2/60
x2 = 3/60

prob2 = stats.expon.cdf(x2, 0, mu)-stats.expon.cdf(x1, 0, mu)
print('P('+ repr(round(x1,3)) + ' < X < ' + repr(round(x2,3)) + ')' + 
      ' = ' + repr(round(prob2,4)))

draw_exp(n, (x1 <= n) & (n <= x2), 1/25, '2 min < X < 3 min')
# I could actually import the expon method!!!!

from scipy.stats import expon

mu=1/25
x = 0.1

prob1 = stats.expon.sf(x, 0, mu)
print('P(X > ' + repr(x) + ')' + ' = ' + repr(round(prob1,4)))

n = np.arange(0,0.3,0.01)
draw_exp(n, (x <= n), 1/25, 'X > 6 min')

x1 = 2/60
x2 = 3/60

prob2 = stats.expon.cdf(x2, 0, mu)-stats.expon.cdf(x1, 0, mu)
print('P('+ repr(round(x1,3)) + ' < X < ' + repr(round(x2,3)) + ')' + 
      ' = ' + repr(round(prob2,4)))

draw_exp(n, (x1 <= n) & (n <= x2), 1/25, '2 min < X < 3 min')

P(X > 0.1) = 0.0821

P(0.033 < X < 0.05) = 0.1481

In [349]:

Copied!

print('x = ' + repr(round(60*stats.expon.isf(0.9, 0, mu),4)) + ' min')
print('x = ' + repr(round(60*stats.expon.isf(0.9, 0, mu),4)) + ' min')

x = 0.2529 min

Ex 6¶

In [350]:

Copied!





mu = 1.4
x = 0.5

prob1 = stats.expon.cdf(x, 0, mu)
print('P(X < ' + repr(x) + ')' + ' = ' + repr(round(prob1,4)))

n = np.arange(0,5,0.01)
draw_exp(n, (n <= x), 1.4, 'X < 30 sec')
mu = 1.4
x = 0.5

prob1 = stats.expon.cdf(x, 0, mu)
print('P(X < ' + repr(x) + ')' + ' = ' + repr(round(prob1,4)))

n = np.arange(0,5,0.01)
draw_exp(n, (n <= x), 1.4, 'X < 30 sec')

P(X < 0.5) = 0.3003

Ex 7¶

In [351]:

Copied!





mu=1/2
x = 40/60

prob1 = stats.expon.cdf(x, 0, mu)
print('P(X < ' + repr(round(x,3)) + ')' + ' = ' + repr(round(prob1,4)))
n = np.arange(0,5,0.01)
draw_exp(n, (n <= x), 1/2, 'X < 40 sec')
mu=1/2
x = 40/60

prob1 = stats.expon.cdf(x, 0, mu)
print('P(X < ' + repr(round(x,3)) + ')' + ' = ' + repr(round(prob1,4)))
n = np.arange(0,5,0.01)
draw_exp(n, (n <= x), 1/2, 'X < 40 sec')

P(X < 0.667) = 0.7364

In [352]:

Copied!

print('x = ' + repr(round(stats.expon.ppf(0.9, 0, mu),4)) + ' min')
print('x = ' + repr(round(stats.expon.ppf(0.9, 0, mu),4)) + ' min')

x = 1.1513 min