import numpy as np
import pandas as pd
import tensorflow as tf

tf.__version__

'2.20.0'

df = pd.read_csv('Churn_Modelling.csv')
df.head(10)

X = df.iloc[:, 3:-1].values
y = df.iloc[:, -1].values

y[0:10]

array([1, 0, 1, 0, 0, 1, 0, 1, 0, 0])

X[0:10]

array([[619, 'France', 'Female', 42, 2, 0.0, 1, 1, 1, 101348.88],
       [608, 'Spain', 'Female', 41, 1, 83807.86, 1, 0, 1, 112542.58],
       [502, 'France', 'Female', 42, 8, 159660.8, 3, 1, 0, 113931.57],
       [699, 'France', 'Female', 39, 1, 0.0, 2, 0, 0, 93826.63],
       [850, 'Spain', 'Female', 43, 2, 125510.82, 1, 1, 1, 79084.1],
       [645, 'Spain', 'Male', 44, 8, 113755.78, 2, 1, 0, 149756.71],
       [822, 'France', 'Male', 50, 7, 0.0, 2, 1, 1, 10062.8],
       [376, 'Germany', 'Female', 29, 4, 115046.74, 4, 1, 0, 119346.88],
       [501, 'France', 'Male', 44, 4, 142051.07, 2, 0, 1, 74940.5],
       [684, 'France', 'Male', 27, 2, 134603.88, 1, 1, 1, 71725.73]],
      dtype=object)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

print(X[0:10])

[[619 'France' 0 42 2 0.0 1 1 1 101348.88]
 [608 'Spain' 0 41 1 83807.86 1 0 1 112542.58]
 [502 'France' 0 42 8 159660.8 3 1 0 113931.57]
 [699 'France' 0 39 1 0.0 2 0 0 93826.63]
 [850 'Spain' 0 43 2 125510.82 1 1 1 79084.1]
 [645 'Spain' 1 44 8 113755.78 2 1 0 149756.71]
 [822 'France' 1 50 7 0.0 2 1 1 10062.8]
 [376 'Germany' 0 29 4 115046.74 4 1 0 119346.88]
 [501 'France' 1 44 4 142051.07 2 0 1 74940.5]
 [684 'France' 1 27 2 134603.88 1 1 1 71725.73]]

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

print(X[0:10])

[[1.0 0.0 0.0 619 0 42 2 0.0 1 1 1 101348.88]
 [0.0 0.0 1.0 608 0 41 1 83807.86 1 0 1 112542.58]
 [1.0 0.0 0.0 502 0 42 8 159660.8 3 1 0 113931.57]
 [1.0 0.0 0.0 699 0 39 1 0.0 2 0 0 93826.63]
 [0.0 0.0 1.0 850 0 43 2 125510.82 1 1 1 79084.1]
 [0.0 0.0 1.0 645 1 44 8 113755.78 2 1 0 149756.71]
 [1.0 0.0 0.0 822 1 50 7 0.0 2 1 1 10062.8]
 [0.0 1.0 0.0 376 0 29 4 115046.74 4 1 0 119346.88]
 [1.0 0.0 0.0 501 1 44 4 142051.07 2 0 1 74940.5]
 [1.0 0.0 0.0 684 1 27 2 134603.88 1 1 1 71725.73]]

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

ann = tf.keras.models.Sequential()

ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

from keras.callbacks import Callback

class InlineLogger(Callback):
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        print(
            f"\rEpoch {epoch + 1}/100 "
            f"- loss: {logs.get('loss', 0):.4f} "
            f"- accuracy: {logs.get('accuracy', 0):.4f}",
            end=""
        )

# Use it in fit()
ann.fit(X_train, y_train, batch_size=32, epochs=100, callbacks=[InlineLogger()], verbose=0)

Epoch 100/100 - loss: 0.3310 - accuracy: 0.8630

<keras.src.callbacks.history.History at 0x1a24fb8a990>

print(ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5)

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 74ms/step
[[False]]

y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
#print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step

from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[1504   91]
 [ 191  214]]

#Accuracy
accuracy_score(y_test, y_pred)

0.859

Artificial Neural Network¶

Importing the libraries¶

Part 1 - Data Preprocessing¶

Importing the dataset¶

Encoding categorical data¶

Splitting the dataset into the Training set and Test set¶

Feature Scaling¶

Part 2 - Building the ANN¶

Initializing the ANN¶

Adding the input layer and the first hidden layer¶

Adding the second hidden layer¶

Adding the output layer¶

Part 3 - Training the ANN¶

Compiling the ANN¶

Training the ANN on the Training set¶

Part 4 - Making the predictions and evaluating the model¶

Predicting the result of a single observation¶

Predicting the Test set results¶

Confusion Matrix¶

	RowNumber	CustomerId	Surname	CreditScore	Geography	Gender	Age	Tenure	Balance	NumOfProducts	HasCrCard	IsActiveMember	EstimatedSalary	Exited
0	1	15634602	Hargrave	619	France	Female	42	2	0.00	1	1	1	101348.88	1
1	2	15647311	Hill	608	Spain	Female	41	1	83807.86	1	0	1	112542.58	0
2	3	15619304	Onio	502	France	Female	42	8	159660.80	3	1	0	113931.57	1
3	4	15701354	Boni	699	France	Female	39	1	0.00	2	0	0	93826.63	0
4	5	15737888	Mitchell	850	Spain	Female	43	2	125510.82	1	1	1	79084.10	0
5	6	15574012	Chu	645	Spain	Male	44	8	113755.78	2	1	0	149756.71	1
6	7	15592531	Bartlett	822	France	Male	50	7	0.00	2	1	1	10062.80	0
7	8	15656148	Obinna	376	Germany	Female	29	4	115046.74	4	1	0	119346.88	1
8	9	15792365	He	501	France	Male	44	4	142051.07	2	0	1	74940.50	0
9	10	15592389	H?	684	France	Male	27	2	134603.88	1	1	1	71725.73	0