Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CS 230 Project
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Toren Lev Fronsdal
CS 230 Project
Commits
e36b81ea
Commit
e36b81ea
authored
4 years ago
by
Toren Lev Fronsdal
Browse files
Options
Downloads
Patches
Plain Diff
Initial commit
parents
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
baseline_model.py
+597
-0
597 additions, 0 deletions
baseline_model.py
with
597 additions
and
0 deletions
baseline_model.py
0 → 100644
+
597
−
0
View file @
e36b81ea
# %% [code]
#######################
### Library imports ###
#######################
# standard library
import
os
import
sys
import
pickle
import
copy
# data packages
import
numpy
as
np
import
pandas
as
pd
# pytorch
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
# sklearn
import
sklearn.base
from
sklearn.decomposition
import
PCA
import
joblib
from
sklearn.base
import
TransformerMixin
,
BaseEstimator
from
sklearn.feature_selection
import
VarianceThreshold
from
sklearn.preprocessing
import
OneHotEncoder
from
sklearn.decomposition
import
PCA
from
sklearn.pipeline
import
make_union
,
make_pipeline
from
sklearn.compose
import
make_column_transformer
from
scipy.stats
import
kurtosis
,
skew
########################
### Global variables ###
########################
weight_method
=
None
# one of {"inverse freq", "square root"}
max_weight
=
None
n_seeds
=
2
n_folds
=
2
# if holdout_set == True, then a holdout set of the train data
# is used as the test set instead of the leaderboard data
holdout_set
=
False
split_method
=
"
grouped
"
# method : one of {"grouped", "target stratified"}
device
=
(
"
cuda
"
if
torch
.
cuda
.
is_available
()
else
"
cpu
"
)
os
.
environ
[
"
CUDA_LAUNCH_BLOCKING
"
]
=
"
1
"
# %% [code]
class
Preprocessor
(
TransformerMixin
):
def
__init__
(
self
,
variance_threshold
=
0.7
,
num_pc_genes
=
80
,
num_pc_cells
=
10
,
seed
=
2021
):
self
.
variance_threshold
=
variance_threshold
,
self
.
num_pc_genes
=
num_pc_genes
self
.
num_pc_cells
=
num_pc_cells
self
.
seed
=
seed
def
fit
(
self
,
X
,
y
=
None
,
X_test
=
None
):
if
X_test
is
not
None
:
X
=
pd
.
concat
([
X
,
X_test
],
axis
=
0
,
ignore_index
=
True
)
gene_feats
=
[
col
for
col
in
X
.
columns
if
col
.
startswith
(
'
g-
'
)]
cell_feats
=
[
col
for
col
in
X
.
columns
if
col
.
startswith
(
'
c-
'
)]
numeric_feats
=
gene_feats
+
cell_feats
categorical_feats
=
[
'
cp_time
'
,
'
cp_dose
'
]
self
.
_transformer
=
make_column_transformer
(
(
OneHotEncoder
(),
categorical_feats
)
)
self
.
_transformer
.
fit
(
X
)
return
self
def
transform
(
self
,
X
):
X_new
=
self
.
_transformer
.
transform
(
X
).
astype
(
"
float32
"
)
return
X_new
# %% [code]
# Sub-class nn.Sequential to add reset_parameters method
class
Sequential
(
nn
.
Sequential
):
def
reset_parameters
(
self
):
for
layer
in
self
.
children
():
if
hasattr
(
layer
,
"
reset_parameters
"
):
layer
.
reset_parameters
()
# %% [code]
class
Network
(
sklearn
.
base
.
BaseEstimator
):
"""
An sklearn-compatible wrapper for pytorch estimators.
Wraps pytorch training and prediction in sklearn-compatible estimator with `fit` and
`predict` methods and limited support for commonly-tuned net parameters. Supports
early stopping and `eval_set` similarly to the LightGBM sklearn implementation.
Parameters
----------
net_obj : obj
The instantiated pytorch network object to be used in training. Should have type
that is a subclass of nn.Module.
seed : int, optional
Seed to be used for randomness in network initalization for reproducibility.
optimizer : type, optional, default=torch.optim.Adam
The optimizer class to be used in training. Should be a subclass of
torch.optim.Optimizer.
loss_fn : callable, optional, default=nn.BCEWithLogitsLoss()
A function or callable loss object with signature `f(y_pred, y_true)`.
device : {
"
cpu
"
,
"
cuda
"
}, optional, default=
"
cpu
"
The device used in training.
lr : float, optional, default=0.001
The learning rate. Ignored if `lr_scheduler` is provided.
weight_decay : float, optional, default=0
Weight decay parameter used for network weight regularization.
batch_size : int, optional, default=128
Batch sized used in training.
max_epochs : int, optional, default=10
Maximum number of epochs used in training. Actual number of epochs used may be
lower if early stopping is enabled.
lr_scheduler : type, optional
Learning rate scheduler for training, e.g. a class from
torch.optim.lr_scheduler.
lr_scheduler_params : dict, optional
The parameters used to initialize the `lr_scheduler`.
Attributes
----------
self.metric_history_ : list of dict
A list of dictionaries recording the values for each metric, eval_set, and
epoch.
self.early_stopping_history_ : list of float
The list of values for only the first metric and eval_set, used for early
stopping if specified.
self.early_stopping_epoch_ : int or None
The optimal epoch chosen by early stopping.
self.net_ : obj
The trained `net_obj`, which is used for prediction.
self.metric_history_df_ : pd.DataFrame
A dataframe wrapper around `self.metric_history_`.
"""
def
__init__
(
self
,
net_obj
,
seed
=
None
,
optimizer
=
torch
.
optim
.
Adam
,
loss_fn
=
None
,
device
=
"
cpu
"
,
lr
=
0.001
,
weight_decay
=
0
,
batch_size
=
128
,
max_epochs
=
10
,
lr_scheduler
=
None
,
lr_scheduler_params
=
None
,
):
self
.
net_obj
=
net_obj
self
.
seed
=
seed
self
.
optimizer
=
optimizer
self
.
loss_fn
=
loss_fn
self
.
device
=
device
self
.
lr
=
lr
self
.
weight_decay
=
weight_decay
self
.
batch_size
=
batch_size
self
.
max_epochs
=
max_epochs
self
.
lr_scheduler
=
lr_scheduler
self
.
lr_scheduler_params
=
lr_scheduler_params
def
fit
(
self
,
X
,
y
,
eval_set
=
None
,
eval_names
=
None
,
eval_metric
=
None
,
patience
=
None
,
min_delta
=
None
,
verbose
=
False
,
):
"""
Trains the network, with support for early stopping.
Parameters
----------
X : array-like, shape (n_samples, n_features)
The training features.
y : array-like, shape (n_samples, n_labels)
The training labels.
eval_set : list of tuple, optional
A list of (X, y) tuples to be used for computing eval loss. At least one
dataset is required for early stopping, in which case the first tuple in the
list is used for early stopping evaluation.
eval_names : list, optional
An optionl list of the same length as `eval_set`, specifying the name of
each dataset.
eval_metric : list of callable, optional
A list of metric functions to be used in evaluation. Loss will be recorded
for all metrics, but only the first metric provided will be used for early
stopping, if enabled. Should have signature `f(y_pred, y_true)`.
patience : int, optional
The number of epochs of increasing loss tested before stopping early. The
number to be tested is reset after every epoch with a decrease in loss. This
parameter may be overridden if `min_delta` is also set.
min_delta : float, optional
The minimum decrease in loss required to continue training. If loss doss not
decrease by more than this value, training will be stopped early and the
stopping epoch will be recorded in the `early_stopping_epoch_` attribute.
verbose : int or bool, optional, default=False
If False, no loss is printed during training. Otherwise, results are printed
after every `verbose` epochs.
Returns
-------
self : obj
Returns the estimator itself, in keeping with sklearn requirements.
"""
# initialize device for training
device
=
torch
.
device
(
self
.
device
)
# set seed for network weight initialization
if
self
.
seed
is
not
None
:
torch
.
manual_seed
(
self
.
seed
)
# this should be redundant with torch.manual_seed
torch
.
cuda
.
manual_seed_all
(
self
.
seed
)
# send pytorch network to specified device
net
=
self
.
net_obj
.
to
(
device
)
# reset initial parameters for net
net
.
reset_parameters
()
# initialize loss and optimizer
if
self
.
loss_fn
is
None
:
loss_fn
=
nn
.
BCEWithLogitsLoss
()
else
:
loss_fn
=
self
.
loss_fn
optimizer
=
self
.
optimizer
(
net
.
parameters
(),
lr
=
self
.
lr
,
weight_decay
=
self
.
weight_decay
)
# helper for converting to tensor
def
to_tensor
(
a
):
return
torch
.
tensor
(
a
,
dtype
=
torch
.
float32
).
to
(
device
)
X_nn
=
to_tensor
(
X
)
y_nn
=
to_tensor
(
y
)
# Add extra dimension if y is single dimensional
if
len
(
y_nn
.
shape
)
==
1
:
y_nn
=
y_nn
.
unsqueeze
(
1
)
# set up for evaluation on train/val data
if
eval_set
is
None
:
eval_set
=
[]
if
eval_metric
is
None
:
eval_metric
=
[]
if
eval_names
is
None
:
eval_names
=
[
f
"
eval_
{
i
}
"
for
i
in
range
(
len
(
eval_set
))]
# add train data as an eval set
eval_set
.
append
((
X
,
y
))
eval_names
.
append
(
"
train
"
)
# convert eval sets to tensors
eval_set
=
[(
to_tensor
(
tup
[
0
]),
to_tensor
(
tup
[
1
]))
for
tup
in
eval_set
]
eval_metric
=
[
m
if
isinstance
(
m
,
tuple
)
else
(
f
"
metric_
{
i
}
"
,
m
)
for
i
,
m
in
enumerate
(
eval_metric
)
]
eval_metric
.
append
((
"
objective
"
,
loss_fn
))
# set up dataloader for batches
dataset
=
torch
.
utils
.
data
.
TensorDataset
(
X_nn
,
y_nn
)
dataloader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
,
batch_size
=
self
.
batch_size
,
shuffle
=
True
,
drop_last
=
True
)
lr_scheduler
=
self
.
lr_scheduler
if
self
.
lr_scheduler_params
is
None
:
lr_scheduler_params
=
{}
else
:
lr_scheduler_params
=
self
.
lr_scheduler_params
if
lr_scheduler
==
"
OneCycleLR
"
:
one_cycle_lr
=
optim
.
lr_scheduler
.
OneCycleLR
(
optimizer
=
optimizer
,
epochs
=
self
.
max_epochs
,
steps_per_epoch
=
len
(
dataloader
),
**
lr_scheduler_params
,
)
if
lr_scheduler
==
"
ReduceLROnPlateau
"
:
reduce_lr_on_plateau
=
optim
.
lr_scheduler
.
ReduceLROnPlateau
(
optimizer
=
optimizer
,
**
lr_scheduler_params
)
# track number of epochs with increasing loss for early stopping
self
.
metric_history_
=
[]
self
.
early_stopping_history_
=
[]
self
.
early_stopping_epoch_
=
None
min_valmetric
=
np
.
inf
increases
=
0
best_params
=
copy
.
deepcopy
(
net
.
state_dict
())
for
epoch
in
range
(
self
.
max_epochs
):
# construct batches
for
batch_x
,
batch_y
in
dataloader
:
net
.
train
()
# zero gradients to start
optimizer
.
zero_grad
()
output
=
net
.
forward
(
batch_x
)
loss
=
loss_fn
(
output
,
batch_y
)
loss
.
backward
()
optimizer
.
step
()
if
lr_scheduler
==
"
OneCycleLR
"
:
one_cycle_lr
.
step
()
net
.
eval
()
# record metrics for each eval set
for
i
in
range
(
len
(
eval_set
)):
X_val
,
y_val
=
eval_set
[
i
]
# Add extra dimension if y is single dimensional
if
len
(
y_val
.
shape
)
==
1
:
y_val
=
y_val
.
unsqueeze
(
1
)
set_name
=
eval_names
[
i
]
with
torch
.
no_grad
():
preds
=
net
(
X_val
)
for
j
in
range
(
len
(
eval_metric
)):
metric_name
,
metric_fn
=
eval_metric
[
j
]
metric_val
=
metric_fn
(
preds
,
y_val
)
if
isinstance
(
metric_val
,
torch
.
Tensor
):
metric_val
=
metric_val
.
item
()
row
=
{
"
epoch
"
:
epoch
,
"
data
"
:
set_name
,
"
metric
"
:
metric_name
,
"
value
"
:
metric_val
,
}
self
.
metric_history_
.
append
(
row
)
# use first val set and first metric for early stopping
if
i
==
0
and
j
==
0
:
self
.
early_stopping_history_
.
append
(
metric_val
)
if
verbose
:
verbose
=
int
(
verbose
)
if
epoch
%
verbose
==
0
:
for
d
in
self
.
metric_history_
[
-
len
(
eval_set
)
*
len
(
eval_metric
)
:]:
print
(
d
)
# if val set is present, record history and follow early stopping parameters
if
len
(
eval_set
)
>
1
:
val_metric
=
self
.
early_stopping_history_
[
-
1
]
if
lr_scheduler
==
"
ReduceLROnPlateau
"
:
reduce_lr_on_plateau
.
step
(
val_metric
)
# early stopping based on minimum decrease in loss
if
min_delta
is
not
None
and
epoch
>
0
:
if
self
.
early_stopping_history_
[
-
2
]
-
val_metric
<
min_delta
:
print
(
"
Early stopping at epoch
"
,
epoch
)
self
.
early_stopping_epoch_
=
epoch
break
# early stopping based on number of epochs with increasing loss
if
val_metric
<
min_valmetric
:
min_valmetric
=
val_metric
increases
=
0
# save model paramaters for current best epoch
best_params
=
copy
.
deepcopy
(
net
.
state_dict
())
elif
patience
is
not
None
:
increases
+=
1
if
increases
>
patience
:
print
(
"
Early stopping at epoch
"
,
epoch
)
self
.
early_stopping_epoch_
=
epoch
break
# if using early stopping, reload net with best params
if
patience
is
not
None
or
min_delta
is
not
None
:
# load model paramaters from best epoch
net
.
load_state_dict
(
best_params
)
net
.
eval
()
# store network for prediction
self
.
net_
=
net
self
.
metric_history_df_
=
pd
.
DataFrame
(
self
.
metric_history_
)
return
self
def
predict
(
self
,
X
):
"""
Predicts using the trained network.
Parameters
----------
X : array-like, shape (n_samples, n_features)
An array of the same shape as the one used in training, containing the data
to be used for predictions.
Returns
-------
np.array
Model predictions in an array of shape (n_samples, n_labels).
"""
# cast to tensor and move to device
device
=
torch
.
device
(
self
.
device
)
X_nn
=
torch
.
tensor
(
X
,
dtype
=
torch
.
float32
).
to
(
device
)
# forward pass through network for predictions
# return predictions as numpy array
with
torch
.
no_grad
():
return
self
.
net_
(
X_nn
).
cpu
().
detach
().
numpy
().
astype
(
"
float32
"
)
def
predict_proba
(
self
,
X
):
"""
Return predictions on probability scale for classification network.
Parameters
----------
X : array-like, shape (n_samples, n_features)
An array of the same shape as the one used in training, containing the data
to be used for predictions.
Returns
-------
np.array
Model predictions in an array of shape (n_samples, n_labels), with sigmoid
transformation applied (i.e. predicted probabilities).
"""
preds
=
self
.
predict
(
X
)
preds_proba
=
1
/
(
1
+
np
.
exp
(
-
preds
))
return
preds_proba
.
astype
(
"
float32
"
)
# %% [code]
class
SmoothCrossEntropyLoss
(
nn
.
modules
.
loss
.
_WeightedLoss
):
"""
Computes smoothed cross entropy (log) loss.
Label smoothing works by clipping the true label values based on a
specified smoothing parameter, e.g., with smoothing == 0.001 and n_classes == 2,
[0, 1] --> [0.005, 0.995].
The formula is given by label smoothed y = y * (1 - smoothing) + smoothing / n_classes
This method can help prevent models from becoming over-confident.
See paper: https://papers.nips.cc/paper/2019/file/f1748d6b0fd9d439f71450117eba2725-Paper.pdf
"""
def
__init__
(
self
,
weight
=
None
,
reduction
=
"
mean
"
,
smoothing
=
0.001
,
device
=
"
cpu
"
):
super
().
__init__
(
weight
=
weight
,
reduction
=
reduction
)
self
.
smoothing
=
smoothing
self
.
weight
=
weight
self
.
device
=
device
@staticmethod
def
_smooth
(
targets
,
n_classes
,
smoothing
,
device
):
"""
Helper for computing smoothed label values.
"""
assert
0
<=
smoothing
<=
1
with
torch
.
no_grad
():
targets
=
(
targets
*
(
1
-
smoothing
)
+
torch
.
ones_like
(
targets
).
to
(
device
)
*
smoothing
/
n_classes
)
return
targets
def
forward
(
self
,
inputs
,
targets
,
sample_weight
=
None
):
# smooth targets
targets
=
SmoothCrossEntropyLoss
().
_smooth
(
targets
,
2
,
self
.
smoothing
,
self
.
device
)
# weight class predictions
if
self
.
weight
is
not
None
:
inputs
=
inputs
*
self
.
weight
.
unsqueeze
(
0
)
if
sample_weight
is
None
:
# binary_cross_entropy_with_logits returns mean log loss
loss
=
F
.
binary_cross_entropy_with_logits
(
inputs
,
targets
,
reduction
=
"
mean
"
)
else
:
# binary_cross_entropy_with_logits returns
# [# obs., # classes] tensor of log losses
loss
=
F
.
binary_cross_entropy_with_logits
(
inputs
,
targets
,
reduction
=
"
none
"
)
assert
loss
.
size
(
0
)
==
sample_weight
.
size
(
0
)
# compute weighted mean for each target
loss
=
torch
.
sum
(
loss
*
sample_weight
,
dim
=
0
)
/
torch
.
sum
(
sample_weight
)
# compute column-wise mean
loss
=
torch
.
mean
(
loss
)
return
loss
class
ClippedCrossEntropyLoss
(
nn
.
modules
.
loss
.
_WeightedLoss
):
"""
Computes clipped cross entropy (log) loss.
Clipped log loss clips the predicted probabilities based on a specified smoothing
parameter, e.g., with smoothing == 0.001, the predicted probabilities [.000013, .99992]
--> [0.005, 0.995].
This method can help prevent models from becoming over-confident.
"""
def
__init__
(
self
,
weight
=
None
,
reduction
=
"
mean
"
,
smoothing
=
0.001
):
super
().
__init__
(
weight
=
weight
,
reduction
=
reduction
)
self
.
smoothing
=
smoothing
self
.
weight
=
weight
def
forward
(
self
,
y_pred
,
y_true
,
sample_weight
=
None
):
# clip predictions
y_pred_clipped
=
torch
.
clamp
(
torch
.
sigmoid
(
y_pred
),
self
.
smoothing
,
1
-
self
.
smoothing
)
# weight class predictions
if
self
.
weight
is
not
None
:
y_pred
=
y_pred
*
self
.
weight
.
unsqueeze
(
0
)
if
sample_weight
is
None
:
# binary_cross_entropy returns mean log loss
loss
=
F
.
binary_cross_entropy
(
y_pred_clipped
,
y_true
,
reduction
=
"
mean
"
)
else
:
# binary_cross_entropy returns [# obs., # classes] tensor of log losses
loss
=
F
.
binary_cross_entropy
(
y_pred_clipped
,
y_true
,
reduction
=
"
none
"
)
assert
loss
.
size
(
0
)
==
sample_weight
.
size
(
0
)
# compute weighted mean for each target
loss
=
torch
.
sum
(
loss
*
sample_weight
,
dim
=
0
)
/
torch
.
sum
(
sample_weight
)
# compute mean across targets
loss
=
torch
.
mean
(
loss
)
return
loss
# %% [code]
###################
### Import Data ###
###################
train_drug
=
pd
.
read_csv
(
"
../input/lish-moa/train_drug.csv
"
)
X
=
pd
.
read_csv
(
"
../input/lish-moa/train_features.csv
"
)
y
=
pd
.
read_csv
(
"
../input/lish-moa/train_targets_scored.csv
"
)
X_test
=
pd
.
read_csv
(
"
../input/lish-moa/test_features.csv
"
)
submission
=
pd
.
read_csv
(
"
../input/lish-moa/sample_submission.csv
"
)
# Remove control observations
y
=
y
.
loc
[
X
[
"
cp_type
"
]
==
"
trt_cp
"
].
reset_index
(
drop
=
True
)
X
=
X
.
loc
[
X
[
"
cp_type
"
]
==
"
trt_cp
"
].
reset_index
(
drop
=
True
)
# used to set control obs. to zero for preds
X_test_copy
=
X_test
.
copy
()
# %% [code]
transformer
=
Preprocessor
()
transformer
.
fit
(
X
)
X
=
transformer
.
transform
(
X
)
y
=
y
.
drop
([
"
sig_id
"
],
axis
=
1
).
values
.
astype
(
"
float32
"
)
# %% [code]
n_input
=
X
.
shape
[
1
]
n_output
=
y
.
shape
[
1
]
hidden_units
=
640
dropout
=
0.2
net_obj
=
Sequential
(
nn
.
BatchNorm1d
(
n_input
),
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
n_input
,
hidden_units
),
nn
.
ReLU
(),
nn
.
BatchNorm1d
(
hidden_units
),
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
hidden_units
,
hidden_units
),
nn
.
ReLU
(),
nn
.
BatchNorm1d
(
hidden_units
),
nn
.
Dropout
(
dropout
),
nn
.
Linear
(
hidden_units
,
n_output
)
)
# %% [code]
# zero the submission preds
submission
.
iloc
[:,
1
:
207
]
=
0
net
=
Network
(
net_obj
=
net_obj
,
max_epochs
=
6
,
batch_size
=
128
,
device
=
device
,
loss_fn
=
SmoothCrossEntropyLoss
(
smoothing
=
0.001
,
device
=
device
),
lr
=
0.001
,
weight_decay
=
1e-6
,
lr_scheduler
=
"
ReduceLROnPlateau
"
)
clipped_log_loss
=
ClippedCrossEntropyLoss
(
smoothing
=
0.001
)
net
.
fit
(
X
=
X
,
y
=
y
,
eval_metric
=
[
clipped_log_loss
],
patience
=
7
,
verbose
=
2
)
net
.
predict_proba
(
X
)
# %% [code]
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment