Notebook for demonstrating different code styles and soliciting preferences.
Imagine the scenario where you need to generate samples from a normal distribution for two different values of and and plot each one separately. There are two options you have:
Write code to generate and plot the sample and copy and paste it for each of the parameter values
Write a function to generate the sample and call it for each of the parameter values
When we are working on a project in the real-world, similiar situations often occur. You may start with only needing to use a chunk of code once, then a new analysis is needed and you code and paste the code to adapt to the new analysis. Instead, using modular functions can help increase code usability and decrease code size.
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd# Plot normal with mu=0, sigma=1, n=1000
mu = 0
sigma = 1
n = 1000
# Generate random samples from a normal distribution
samples = np.random.normal(loc=mu, scale=sigma, size=n)
# Visualize the distribution of the samples using a histogram
plt.hist(samples, bins=30, density=True, alpha=0.6, color='g')
plt.title('Histogram of Samples from Normal Distribution')
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.grid(True)
plt.show()
# Display the mu and sigma
df = pd.DataFrame({"True": [mu, sigma],
"Sample": [np.mean(samples), np.std(samples)]},
index=["Mu", "Sigma"])
display(df.round(3))
# Plot normal with mu=1, sigma=4, n=500
mu = 1
sigma = 4
n = 500
# Generate random samples from a normal distribution
samples = np.random.normal(loc=mu, scale=sigma, size=n)
# Visualize the distribution of the samples using a histogram
plt.hist(samples, bins=30, density=True, alpha=0.6, color='g')
plt.title('Histogram of Samples from Normal Distribution')
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.grid(True)
plt.show()
# Display the mu and sigma
df = pd.DataFrame({"True": [mu, sigma],
"Sample": [np.mean(samples), np.std(samples)]},
index=["Mu", "Sigma"])
display(df.round(3))Code 2¶
def sample_normal_dist(mu, sigma, n, plot=True):
# Generate random samples from a normal distribution
samples = np.random.normal(loc=mu, scale=sigma, size=n)
if plot:
# Visualize the distribution of the samples using a histogram
plt.hist(samples, bins=30, density=True, alpha=0.6, color='g')
plt.title('Histogram of Samples from Normal Distribution')
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.grid(True)
plt.show()
# Display the mu and sigma
df = pd.DataFrame({"True": [mu, sigma],
"Sample": [np.mean(samples), np.std(samples)]},
index=["Mu", "Sigma"])
display(df.round(3))
returnmu = 0
sigma = 1
n = 1000
sample_normal_dist(mu, sigma, n)mu = 1
sigma = 4
n = 500
sample_normal_dist(mu, sigma, n)def sample_normal_dist(mu: float, sigma: float, n: int, plot=True):
"""
Samples from a normal distribution with given parameters with
the option to plot
Args:
mu (float): mean.
sigma (float): standard deviation.
n (int): number of samples.
Returns:
np.array: array of samples.
"""
# Generate random samples from a normal distribution
samples = np.random.normal(loc=mu, scale=sigma, size=n)
if plot:
# Visualize the distribution of the samples using a histogram
plt.hist(samples, bins=30, density=True, alpha=0.6, color='g')
plt.title('Histogram of Samples from Normal Distribution')
plt.xlabel('Value')
plt.ylabel('Probability Density')
plt.grid(True)
plt.show()
# Display the mu and sigma
df = pd.DataFrame({"True": [mu, sigma],
"Sample": [np.mean(samples), np.std(samples)]},
index=["Mu", "Sigma"])
display(df.round(3))
return samplesCode 2¶
def sample_normal_dist(mu,sigma, n, plot =True):
# Generate random samples from a normal distribution
samples =np.random.normal( loc= mu, scale = sigma, size=n)
if plot :
# Visualize the distribution of the samples using a histogram
plt.hist(samples,bins= 30, density =True,alpha= 0.6,color='g' )
plt.title('Histogram of Samples from Normal Distribution' )
plt.xlabel( 'Value')
plt.ylabel('Probability Density')
plt.grid(True)
plt.show()
# Display the mu and sigma
df= pd.DataFrame({"True": [mu,
sigma], "Sample": [np.mean(samples),
np.std(samples)]},
index=["Mu", "Sigma"])
display(df.round(3))
return samples