Regression - Tensorflow#
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
print(tf.__version__)
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
np.set_printoptions(precision=3, suppress=True)
import pandas as pd
import seaborn as sns
2023-06-01 12:47:04.882786: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-01 12:47:04.946947: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2.11.1
Create Data#
np.random.seed(0)
cols = "abcdefghijk"
df = pd.DataFrame({
col:np.random.normal(0,1,100)
for col in cols
})
coefs = np.round(np.random.uniform(2,10,len(cols)),0)
print(coefs)
df["y"] = np.array(df) @ coefs
[6. 3. 5. 8. 4. 3. 9. 8. 7. 8. 9.]
df.head()
a | b | c | d | e | f | g | h | i | j | k | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.764052 | 1.883151 | -0.369182 | -1.306527 | -0.598654 | 0.382732 | -1.550429 | -1.444940 | 1.411172 | -1.461733 | 0.555963 | -19.636155 |
1 | 0.400157 | -1.347759 | -0.239379 | 1.658131 | -1.115897 | -0.034242 | 0.417319 | -1.210543 | 0.785804 | -0.683440 | 0.892474 | 7.996400 |
2 | 0.978738 | -1.270485 | 1.099660 | -0.118164 | 0.766663 | 1.096347 | -0.944368 | -0.788669 | -0.057470 | 0.367545 | -0.422315 | -3.101780 |
3 | 2.240893 | 0.969397 | 0.655264 | -0.680178 | 0.356293 | -0.234216 | 0.238103 | 1.094638 | -0.391217 | 0.190312 | 0.104714 | 25.537401 |
4 | 1.867558 | -1.173123 | 0.640132 | 0.666383 | -1.768538 | -0.347451 | -1.405963 | 0.234822 | 0.940918 | -0.851729 | 0.228053 | -0.848830 |
Split Train/Test#
train, test = train_test_split(df, test_size=0.2, random_state=0)
train_labels = train.pop("y")
test_labels = test.pop("y")
Single Layer#
“Dense” layer implements activation(dot(input, kernel) + bias)
activation is the element-wise activation function
kernel is the weights matrix created by the layer
bias is applicable if
use_bias
isTrue
The model is:
\[y = WX + b\]
y is [1,80]
W is [1, 11]
X is [11, 80]
b is [1, 1], duplicated to [1,80] with broadcasting
# build the model
# normalization optional for this simple model
# normalizer = layers.Normalization(axis=-1)
linear_model = tf.keras.Sequential([
# normalizer,
layers.Dense(units=1, use_bias=True)
])
# print summary of model (need to build first to print summary)
linear_model.predict(train)
linear_model.summary()
1/3 [=========>....................] - ETA: 0s
3/3 [==============================] - 0s 928us/step
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 1) 12
=================================================================
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________
2023-06-01 12:47:06.016287: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
# configure training procedure
linear_model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss='mean_absolute_error'
)
%%time
# train model
epochs = 100
history = linear_model.fit(
train,
train_labels,
epochs=epochs,
# Suppress logging.
verbose=0,
# Calculate validation results on 20% of the training data.
validation_split = 0.2
)
CPU times: user 1.33 s, sys: 125 ms, total: 1.45 s
Wall time: 1.33 s
dfhist = pd.DataFrame(history.history)
dfhist["epoch"] = history.epoch
sns.lineplot(dfhist["epoch"],dfhist["loss"])
sns.lineplot(dfhist["epoch"],dfhist["val_loss"])
/home/chansoo/projects/statsbook/.venv/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
/home/chansoo/projects/statsbook/.venv/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<Axes: xlabel='epoch', ylabel='loss'>
# predict using
# linear_model.predict(train)
# compare weights of dense layer vs actual coefficients
# note that if normalizing, dense layer is in linear_model.layers[1]
print(linear_model.layers[0].kernel)
print(coefs)
<tf.Variable 'dense/kernel:0' shape=(11, 1) dtype=float32, numpy=
array([[5.986],
[3.005],
[5.009],
[7.974],
[4.027],
[3.012],
[8.996],
[8.019],
[6.976],
[7.999],
[9.012]], dtype=float32)>
[6. 3. 5. 8. 4. 3. 9. 8. 7. 8. 9.]
Deep Neural Network#
Simply add more layers when building model.
Note that weights won’t reflect coefficient values anymore.
%%time
# normalizer = layers.Normalization(axis=-1)
linear_model = tf.keras.Sequential([
layers.Dense(64, activation='relu'),
layers.Dense(64, activation='relu'),
layers.Dense(units=1, use_bias=True)
])
linear_model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss='mean_absolute_error'
)
epochs = 100
history = linear_model.fit(
train,
train_labels,
epochs=epochs,
# Suppress logging.
verbose=0,
# Calculate validation results on 20% of the training data.
validation_split = 0.2
)
CPU times: user 1.37 s, sys: 272 ms, total: 1.64 s
Wall time: 1.45 s
dfhist = pd.DataFrame(history.history)
dfhist["epoch"] = history.epoch
sns.lineplot(dfhist["epoch"],dfhist["loss"])
sns.lineplot(dfhist["epoch"],dfhist["val_loss"])
/home/chansoo/projects/statsbook/.venv/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
/home/chansoo/projects/statsbook/.venv/lib/python3.8/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
<Axes: xlabel='epoch', ylabel='loss'>