Skip to content

Instantly share code, notes, and snippets.

@alexcasalboni
Last active June 29, 2017 09:46
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save alexcasalboni/8acc123d64118813d4d87428aed2f5ea to your computer and use it in GitHub Desktop.
Let's build a scikit-learn model
sklearn
numpy
scipy
matplotlib
pandas
import os
import pickle
import gzip
from sklearn import svm
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd
def main():
""" Train and test a model """
dataset = pd.read_csv('zipped_dataset.csv.gz', compression='gzip')
N = int(dataset.shape[0] * 0.7) # 70/30
model = load_model('trained_model.pkl', dataset, N)
test_model(model, dataset, N)
def load_model(filename, dataset, N):
""" Retrieve loaded or trained model """
if not os.path.isfile(filename):
model = train_model(dataset, N)
with gzip.open(filename, 'w') as f:
pickle.dump(model, f)
else:
with gzip.open(filename, 'r') as f:
model = pickle.load(f)
return model
def train_model(dataset, N):
""" Train a new model """
X_train = dataset.iloc[:N, 1:]
y_train = dataset.iloc[:N, 0]
model = svm.LinearSVC()
model.fit(X_train, y_train)
return model
def test_model(model, dataset, N):
""" Test the given model (confusion matrix) """
X_test = dataset.iloc[N:, 1:]
y_test = dataset.iloc[N:, 0]
y_ = model.predict(X_test)
C = confusion_matrix(y_test, y_)
print np.around(C / C.astype(np.float).sum(axis=1) / 0.01)
if __name__ == '__main__':
main()
This file has been truncated, but you can view the full file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment