import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
import seaborn as sns
from sklearn.datasets import load_boston
from sklearn.datasets import load_iris
from sklearn.datasets import load_wine
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm
from sklearn import tree
from sklearn.model_selection import GridSearchCV
from IPython.display import Image
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.graphics.regressionplots import abline_plot

from patsy import dmatrices, dmatrix, demo_data, ContrastMatrix, Poly


df = pd.read_csv("penguins_size.csv")
df.head()


# Renaming the columns to more intuitive names
df.rename(columns = {"species" : "Species", "island" : "Island", "culmen_length_mm": "Bill length (mm)",
                    "culmen_depth_mm" : "Bill depth (mm)", "flipper_length_mm" : "Flipper length (mm)", 
                    "body_mass_g" : "Body mass (grams)", "sex" : "Sex"}, inplace = True)

# Drops all columns with NaN values
df.dropna(inplace = True)

# Ensures all the inputs in the gender columns are valid
df = df[df["Sex"].str.contains("MALE") | df["Sex"].str.contains("FEMALE")]

# Changes values in the gender column to 0 if male and 1 if female
df["Sex"].replace(to_replace={"FEMALE" : 1, "MALE" : 0}, inplace=True)

#Drops the island column since that will not be used in analysis
df.drop(columns = ["Island"], inplace = True)

# Resets indices
df.reset_index(drop = True, inplace = True)

# Ensures all the measurements for bill length/depth, flipper length, and body mass are floats
for i in range(0, len(df)):
    df.at[i, "Bill length (mm)"] = float(df.at[i, "Bill length (mm)"])
    df.at[i, "Bill depth (mm)"] = float(df.at[i, "Bill depth (mm)"])
    df.at[i, "Flipper length (mm)"] = float(df.at[i, "Flipper length (mm)"])
    df.at[i, "Body mass (grams)"] = float(df.at[i, "Body mass (grams)"])

    
df.head()


# Isolating Adelie Penguin Data
adelie_data = df[df["Species"].str.contains("Adelie")]
adelie_data.reset_index(inplace = True, drop = True)

#Isolating into male versus female adelie penguins
male_data_adelie = adelie_data[adelie_data["Sex"] == 0]
female_data_adelie = adelie_data[adelie_data["Sex"] == 1]

# Plots the data in overlapping histograms
fig, ax = plt.subplots(4,1, figsize = (15,20))
ax[0].hist([male_data_adelie["Bill length (mm)"], female_data_adelie["Bill length (mm)"]], color=  ["blue", "pink"])
ax[0].set_xlabel("Bill Length (mm)")
ax[0].set_ylabel("Frequency")
ax[0].set_title("Histogram for Adelie Penguin Bill Length (mm) by gender")


ax[1].hist([male_data_adelie["Bill depth (mm)"], female_data_adelie["Bill depth (mm)"]], color=  ["blue", "pink"])
ax[1].set_xlabel("Bill Depth (mm)")
ax[1].set_ylabel("Frequency")
ax[1].set_title("Histogram for Adelie Penguin Bill Depth (mm) by gender")

ax[2].hist([male_data_adelie["Flipper length (mm)"], female_data_adelie["Flipper length (mm)"]], color=  ["blue", "pink"])
ax[2].set_xlabel("Flipper length (mm)")
ax[2].set_ylabel("Frequency")
ax[2].set_title("Histogram for Adelie Penguin Flipper Length (mm) by gender")


ax[3].hist([male_data_adelie["Body mass (grams)"], female_data_adelie["Body mass (grams)"]], color=  ["blue", "pink"])
ax[3].set_xlabel("Body mass (grams)")
ax[3].set_ylabel("Frequency")
ax[3].set_title("Histogram for Adelie Penguin Body Mass (grams) by gender")

fig.savefig("adelie_gender_diff.png")


# Isolating Chinstrap Penguin Data
chin_data = df[df["Species"].str.contains("Chinstrap")]
chin_data.reset_index(inplace = True, drop = True)

#Isolating into male versus female Chinstrap penguins
male_data_chin = chin_data[chin_data["Sex"] == 0]
female_data_chin = chin_data[chin_data["Sex"] == 1]

#Plotting the data
fig, ax = plt.subplots(4,1, figsize = (15,20))
ax[0].hist([male_data_chin["Bill length (mm)"], female_data_chin["Bill length (mm)"]], color=  ["blue", "pink"])
ax[0].set_xlabel("Bill Length (mm)")
ax[0].set_ylabel("Frequency")
ax[0].set_title("Histogram for Chinstrap Penguin bill length (mm) by gender")


ax[1].hist([male_data_chin["Bill depth (mm)"], female_data_chin["Bill depth (mm)"]], color=  ["blue", "pink"])
ax[1].set_xlabel("Bill Depth (mm)")
ax[1].set_ylabel("Frequency")
ax[1].set_title("Histogram for Chinstrap Penguin bill depth (mm) by gender")

ax[2].hist([male_data_chin["Flipper length (mm)"], female_data_chin["Flipper length (mm)"]], color=  ["blue", "pink"])
ax[2].set_xlabel("Flipper length (mm)")
ax[2].set_ylabel("Frequency")
ax[2].set_title("Histogram for Chinstrap Penguin flipper length (mm) by gender")


ax[3].hist([male_data_chin["Body mass (grams)"], female_data_chin["Body mass (grams)"]], color=  ["blue", "pink"])
ax[3].set_xlabel("Body mass (grams)")
ax[3].set_ylabel("Frequency")
ax[3].set_title("Histogram for Chinstrap Penguin body mass (grams) by gender")

fig.savefig("chin_gender_diff.png")


# Isolating Gentoo Penguin Data
gent_data = df[df["Species"].str.contains("Gentoo")]
gent_data.reset_index(inplace = True, drop = True)

#Isolating into male versus female Gentoo penguins
male_data_gent = gent_data[gent_data["Sex"] == 0]
female_data_gent = gent_data[gent_data["Sex"] == 1]

#Plotting data
fig, ax = plt.subplots(4,1, figsize = (15,20))
ax[0].hist([male_data_gent["Bill length (mm)"], female_data_gent["Bill length (mm)"]], color=  ["blue", "pink"])
ax[0].set_xlabel("Bill Length (mm)")
ax[0].set_ylabel("Frequency")
ax[0].set_title("Histogram for Gentoo Penguin bill length (mm) by gender")


ax[1].hist([male_data_gent["Bill depth (mm)"], female_data_gent["Bill depth (mm)"]], color=  ["blue", "pink"])
ax[1].set_xlabel("Bill Depth (mm)")
ax[1].set_ylabel("Frequency")
ax[1].set_title("Histogram for Gentoo Penguin bill depth (mm) by gender")

ax[2].hist([male_data_gent["Flipper length (mm)"], female_data_gent["Flipper length (mm)"]], color=  ["blue", "pink"])
ax[2].set_xlabel("Flipper length (mm)")
ax[2].set_ylabel("Frequency")
ax[2].set_title("Histogram for Gentoo Penguin flipper length (mm) by gender")


ax[3].hist([male_data_gent["Body mass (grams)"], female_data_gent["Body mass (grams)"]], color=  ["blue", "pink"])
ax[3].set_xlabel("Body mass (grams)")
ax[3].set_ylabel("Frequency")
ax[3].set_title("Histogram for Gentoo Penguin body mass (grams) by gender")

fig.savefig("gent_gender_diff.png")


# Creates violin plot using seaborn with x-axis as Species and y-axis as measurements (bill length/depth, etc...)
fig1, ax1 = plt.subplots(2,2, figsize=(15, 15))

# Plots the violin plots
sns.violinplot(ax = ax1[0,0],
               x=df["Species"], 
               y =df["Bill length (mm)"]).set_title('Violin Plot of Bill Lengths (mm) by Penguin Species')

sns.violinplot(ax = ax1[0,1],
               x=df["Species"], 
               y =df["Bill depth (mm)"]).set_title('Violin Plot of Bill Depths (mm) by Penguin Species')

sns.violinplot(ax = ax1[1,0],
               x=df["Species"], 
               y =df["Flipper length (mm)"]).set_title('Violin Plot of Flipper Lengths (mm) by Penguin Species')

sns.violinplot(ax = ax1[1,1],
               x=df["Species"], 
               y =df["Body mass (grams)"]).set_title('Violin Plot of Body Mass (grams) by Penguin Species')

#fig1.delaxes(ax1[1,1])

fig1.savefig("bill_lengths_violin_plot.png")


# Uses seaborn to show a heatmap of all the variables with one another
sns.heatmap(df.corr(), annot = True)
plt.show()


# Separates resulting figure into four partitions
fig, ax = plt.subplots(2,2, figsize = (15,15))

# Plots body mass vs. flipper length
ax[0, 0].scatter(adelie_data["Body mass (grams)"], adelie_data["Flipper length (mm)"], color = "blue")
ax[0, 0].scatter(chin_data["Body mass (grams)"], chin_data["Flipper length (mm)"], color = "green")
ax[0, 0].scatter(gent_data["Body mass (grams)"], gent_data["Flipper length (mm)"], color = "red")
ax[0, 0].set_xlabel("Body Mass (grams)")
ax[0, 0].set_ylabel("Flipper length (mm)")
ax[0, 0].set_title("Body Mass vs. Flipper Length for All Penguins")

# Plots bill length vs. flipper length
ax[1, 0].scatter(adelie_data["Bill length (mm)"], adelie_data["Flipper length (mm)"], color = "blue")
ax[1, 0].scatter(chin_data["Bill length (mm)"], chin_data["Flipper length (mm)"], color = "green")
ax[1, 0].scatter(gent_data["Bill length (mm)"], gent_data["Flipper length (mm)"], color = "red")
ax[1, 0].set_xlabel("Bill length (mm)")
ax[1, 0].set_ylabel("Flipper length (mm)")
ax[1, 0].set_title("Bill Length vs. Flipper Length for All Penguins")

#Plots body mass vs. bill length
ax[0, 1].scatter(adelie_data["Body mass (grams)"], adelie_data["Bill length (mm)"], color = "blue")
ax[0, 1].scatter(chin_data["Body mass (grams)"], chin_data["Bill length (mm)"], color = "green")
ax[0, 1].scatter(gent_data["Body mass (grams)"], gent_data["Bill length (mm)"], color = "red")
ax[0, 1].set_xlabel("Body mass (grams)")
ax[0, 1].set_ylabel("Bill length (mm)")
ax[0, 1].set_title("Body Mass vs. Bill Length for All Penguins")

fig.delaxes(ax[1,1])
fig.savefig("bm_vs_bl.png")


results = smf.ols("Q('Flipper length (mm)') ~ Q('Species')*Q('Body mass (grams)')",  data=df).fit()
print("Statsmodel Linear Regression Results: ")
print(results.summary())

Statsmodel Linear Regression Results: 
                               OLS Regression Results                               
====================================================================================
Dep. Variable:     Q('Flipper length (mm)')   R-squared:                       0.856
Model:                                  OLS   Adj. R-squared:                  0.854
Method:                       Least Squares   F-statistic:                     389.9
Date:                      Mon, 20 Dec 2021   Prob (F-statistic):          2.08e-135
Time:                              20:29:37   Log-Likelihood:                -1028.1
No. Observations:                       333   AIC:                             2068.
Df Residuals:                           327   BIC:                             2091.
Df Model:                                 5                                         
Covariance Type:                  nonrobust                                         
====================================================================================================================
                                                       coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------------------
Intercept                                          165.6032      3.619     45.756      0.000     158.483     172.723
Q('Species')[T.Chinstrap]                          -14.2224      7.339     -1.938      0.053     -28.660       0.215
Q('Species')[T.Gentoo]                               4.0640      6.195      0.656      0.512      -8.123      16.251
Q('Body mass (grams)')                               0.0066      0.001      6.820      0.000       0.005       0.009
Q('Species')[T.Chinstrap]:Q('Body mass (grams)')     0.0053      0.002      2.704      0.007       0.001       0.009
Q('Species')[T.Gentoo]:Q('Body mass (grams)')        0.0027      0.001      1.978      0.049    1.54e-05       0.005
==============================================================================
Omnibus:                        2.455   Durbin-Watson:                   1.756
Prob(Omnibus):                  0.293   Jarque-Bera (JB):                2.258
Skew:                          -0.122   Prob(JB):                        0.323
Kurtosis:                       3.322   Cond. No.                     1.38e+05
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.38e+05. This might indicate that there are
strong multicollinearity or other numerical problems.


# Gets the residuals from the statsmodel OLS
interaction_model_resids = results.resid

# Plots violin plot of residuals by species
fig, ax = plt.subplots()

sns.violinplot(x=df["Species"], 
               y =interaction_model_resids).set_title('Residuals of Linear Regression between Body Mass and Flipper Length')

Text(0.5, 1.0, 'Residuals of Linear Regression between Body Mass and Flipper Length')


results = smf.ols("Q('Flipper length (mm)') ~ Q('Species')*Q('Bill length (mm)')",  data=df).fit()
print("Statsmodel Linear Regression Results: ")
print(results.summary())

Statsmodel Linear Regression Results: 
                               OLS Regression Results                               
====================================================================================
Dep. Variable:     Q('Flipper length (mm)')   R-squared:                       0.831
Model:                                  OLS   Adj. R-squared:                  0.829
Method:                       Least Squares   F-statistic:                     322.5
Date:                      Mon, 20 Dec 2021   Prob (F-statistic):          4.74e-124
Time:                              20:29:37   Log-Likelihood:                -1054.8
No. Observations:                       333   AIC:                             2122.
Df Residuals:                           327   BIC:                             2144.
Df Model:                                 5                                         
Covariance Type:                  nonrobust                                         
===================================================================================================================
                                                      coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------------------------
Intercept                                         158.5047      7.038     22.520      0.000     144.659     172.351
Q('Species')[T.Chinstrap]                         -11.8689     12.545     -0.946      0.345     -36.548      12.810
Q('Species')[T.Gentoo]                             -8.2555     10.801     -0.764      0.445     -29.503      12.992
Q('Bill length (mm)')                               0.8139      0.181      4.500      0.000       0.458       1.170
Q('Species')[T.Chinstrap]:Q('Bill length (mm)')     0.1934      0.279      0.694      0.488      -0.355       0.742
Q('Species')[T.Gentoo]:Q('Bill length (mm)')        0.5943      0.250      2.382      0.018       0.104       1.085
==============================================================================
Omnibus:                       11.317   Durbin-Watson:                   1.975
Prob(Omnibus):                  0.003   Jarque-Bera (JB):               18.403
Skew:                          -0.203   Prob(JB):                     0.000101
Kurtosis:                       4.078   Cond. No.                     2.32e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.32e+03. This might indicate that there are
strong multicollinearity or other numerical problems.


# Gets the residuals from the statsmodel OLS
interaction_model_resids = results.resid

# Plots violin plot of residuals by year
fig, ax = plt.subplots()

sns.violinplot(x=df["Species"], 
               y =interaction_model_resids).set_title('Residuals of Linear Regression between Bill Length and Flipper Length')

Text(0.5, 1.0, 'Residuals of Linear Regression between Bill Length and Flipper Length')


results = smf.ols("Q('Bill length (mm)') ~ Q('Species')*Q('Body mass (grams)')",  data=df).fit()
print("Statsmodel Linear Regression Results: ")
print(results.summary())

Statsmodel Linear Regression Results: 
                              OLS Regression Results                             
=================================================================================
Dep. Variable:     Q('Bill length (mm)')   R-squared:                       0.808
Model:                               OLS   Adj. R-squared:                  0.805
Method:                    Least Squares   F-statistic:                     275.3
Date:                   Mon, 20 Dec 2021   Prob (F-statistic):          7.30e-115
Time:                           20:29:37   Log-Likelihood:                -762.97
No. Observations:                    333   AIC:                             1538.
Df Residuals:                        327   BIC:                             1561.
Df Model:                              5                                         
Covariance Type:               nonrobust                                         
====================================================================================================================
                                                       coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------------------------------
Intercept                                           27.1129      1.632     16.609      0.000      23.902      30.324
Q('Species')[T.Chinstrap]                            5.0613      3.310      1.529      0.127      -1.451      11.573
Q('Species')[T.Gentoo]                              -0.5750      2.794     -0.206      0.837      -6.072       4.922
Q('Body mass (grams)')                               0.0032      0.000      7.228      0.000       0.002       0.004
Q('Species')[T.Chinstrap]:Q('Body mass (grams)')     0.0013      0.001      1.475      0.141      -0.000       0.003
Q('Species')[T.Gentoo]:Q('Body mass (grams)')        0.0010      0.001      1.558      0.120      -0.000       0.002
==============================================================================
Omnibus:                        5.384   Durbin-Watson:                   2.252
Prob(Omnibus):                  0.068   Jarque-Bera (JB):                5.657
Skew:                           0.207   Prob(JB):                       0.0591
Kurtosis:                       3.487   Cond. No.                     1.38e+05
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.38e+05. This might indicate that there are
strong multicollinearity or other numerical problems.


# Gets the residuals from the statsmodel OLS
interaction_model_resids = results.resid

# Plots violin plot of residuals by year
fig, ax = plt.subplots()

sns.violinplot(x=df["Species"], 
               y =interaction_model_resids).set_title('Residuals of Linear Regression between Body Mass and Bill Length')

Text(0.5, 1.0, 'Residuals of Linear Regression between Body Mass and Bill Length')


## k-NN Classification

# Creates X by dropping the species values and aggregating the rest of the columns into arrays, one array for 
# each observed penguin. 
# That is, our predictors will be the other columns like bill length, bill depth, flipper length, body mass, and sex
X = df.drop(columns = "Species").values

# Creates y by aggregating all species values into an array
# That is, our targets will be the species names
y = df["Species"].values


#Creates dictionary of parameters
params = {"n_neighbors": [1,2,3,4,5,6,7,8,9,10], "weights" : ["uniform", "distance"]}
k_neighbors = KNeighborsClassifier()
grid_search = GridSearchCV(estimator=k_neighbors, param_grid=params, cv = 10, scoring = 'accuracy')
clf = grid_search.fit(X,y)

#Prints the best combination of parameters
print("k-NN best combination of parameters: ")
print(clf.best_params_)
print()
clf = KNeighborsClassifier(n_neighbors=1, weights="uniform")
scores = cross_val_score(clf, X, y, cv=10, scoring = 'accuracy')
print("k-NN Classification has %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

k-NN best combination of parameters: 
{'n_neighbors': 1, 'weights': 'uniform'}

k-NN Classification has 0.86 accuracy with a standard deviation of 0.05


## Decision Tree (Classification)


# Makes list of column names and target (species) names, to be used when we generate an image of the overall decision tree 
col_names = df.drop(columns = "Species").columns
target_names = df["Species"].values
target_names = list(set(target_names))

# Creates dictionary of parameter values
params = {"criterion": ["gini", "entropy"], "splitter" : ["best", "random"], "max_depth" : [1,2,3,4,5,6,7,8,9,10], 
         "max_features" : ["auto", "sqrt", "log2"]}

decision_tree = DecisionTreeClassifier(random_state=0)

# Performs grid search 
grid_search = GridSearchCV(estimator=decision_tree, param_grid=params, cv = 10, scoring = 'accuracy')
clf = grid_search.fit(X,y)

#Prints the best combination of parameters
print("Decision Tree best combination of parameters: ")
print(clf.best_params_)
print()

# Performs 10-fold cross validation with the identified best parameters
clf = DecisionTreeClassifier(criterion = "entropy", random_state=0, splitter='best', max_features = 'auto', max_depth = 4)

# Using accuracy as the scoring metric
scores = cross_val_score(clf, X, y, cv=10, scoring = 'accuracy')

clf.fit(X,y)
# Prints the mean and standard deviation accurary of decision tree classification
print("Decision Tree has %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

fig = plt.figure(figsize=(25,20))
tree.plot_tree(clf, 
               feature_names=col_names,  
               class_names=target_names,
               filled=True)

fig.savefig("decistion_tree.png")

Decision Tree best combination of parameters: 
{'criterion': 'entropy', 'max_depth': 4, 'max_features': 'auto', 'splitter': 'best'}

Decision Tree has 0.95 accuracy with a standard deviation of 0.02


## Random Forest  (Classification)

# Creates dictionary of parameters
params = {"criterion": ["gini", "entropy"], "max_depth" : [1,2,3,4,5,6,7,8],
          "max_features" : ["auto", "sqrt", "log2"]}

random_forest = RandomForestClassifier(random_state=0)

# Performs grid search 
grid_search = GridSearchCV(estimator=random_forest, param_grid=params, cv = 10, scoring = 'accuracy')
clf = grid_search.fit(X,y)

#Prints the best combination of parameters
print("Random Forest best combination of parameters: ")
print(clf.best_params_)
print()
# Performs 10-fold cross validation with the identified best parameters
clf = RandomForestClassifier(criterion = "entropy", n_estimators = 100, random_state=0, max_features = 'auto', max_depth = 7)

# Using accuracy as the scoring metric
scores = cross_val_score(clf, X, y, cv=10, scoring = 'accuracy')

# Prints the mean and standard deviation accurary of decision tree classification

print("Random Forest has %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

Random Forest best combination of parameters: 
{'criterion': 'entropy', 'max_depth': 7, 'max_features': 'auto'}

Random Forest has 0.98 accuracy with a standard deviation of 0.02

	Species	Bill length (mm)	Bill depth (mm)	Flipper length (mm)	Body mass (grams)	Sex
0	Adelie	39.1	18.7	181.0	3750.0	0
1	Adelie	39.5	17.4	186.0	3800.0	1
2	Adelie	40.3	18.0	195.0	3250.0	1
3	Adelie	36.7	19.3	193.0	3450.0	1
4	Adelie	39.3	20.6	190.0	3650.0	0

	species	island	culmen_length_mm	culmen_depth_mm	flipper_length_mm	body_mass_g	sex
0	Adelie	Torgersen	39.1	18.7	181.0	3750.0	MALE
1	Adelie	Torgersen	39.5	17.4	186.0	3800.0	FEMALE
2	Adelie	Torgersen	40.3	18.0	195.0	3250.0	FEMALE
3	Adelie	Torgersen	NaN	NaN	NaN	NaN	NaN
4	Adelie	Torgersen	36.7	19.3	193.0	3450.0	FEMALE

Waddle We Do Without Penguins: A Data Science Exploration Into Antarctica's Penguins¶

By: Daisy Liao¶

Navigation¶

Introduction¶

Data Collection¶

Exploratory Data Analysis¶

Intraspecies Gender Observations¶

Adelie Penguins¶

Bill Length¶

Bill depth¶

Flipper Length¶

Body Mass¶

Chinstrap Penguins¶

Bill length¶

Bill depth¶

Flipper length¶

Body mass¶

Gentoo Penguins¶

Bill length¶

Bill depth¶

Flipper length¶

Body mass¶

Interspecies Observations¶

Bill length¶

Bill depth¶

Flipper length¶

Body mass¶

Heatmap of Variables¶

Body Mass vs. Flipper Length¶

Body Mass vs. Bill Length¶

Bill Length vs. Flipper Length¶

Linear Regression Analysis¶

Body Mass vs Flipper Length¶

Violin Plot of Residuals¶

Bill Length vs Flipper Length¶

Violin Plot of Residuals¶

Body Mass vs Bill Length¶

Final Remarks¶

Machine Learning¶

k-NN Classification¶

k-NN Setup¶

Results¶

Decision Tree¶

Decision Tree Setup¶

Results¶

Random Forest¶

Setup¶

Results¶

Overall Remarks on Machine Learning¶

Conclusion¶

Further Reading¶