계속해서 AI에서 사용하는 알고리즘들에 대해서 공부를 하고 있는 중이다. 항상 그러하듯이 이론과 실제는 다르다. 내가 좋아하고 자주 듣기도 하는 말이 있는데 바로 '하드웨어는 거짓말을 하지 않는다.'라는 것이다. 여기에서 하드웨어는 컴퓨터 그 자체를 의미하기도 한다.
내가 짠 프로그램이 작동하지 않는다고?? 하드웨어는 거짓말을 하지 않는다. 그냥 내가 잘못한거다!!!!
차치하고, 오늘 배운 내용들은 크게 다음과 같다.
● 데이터셋 나누기
● Underfitting / Overfitting
● Skewed Classes
● Precision/Recall
Copyright © 2018. Alina Inc. All Rights Reserved.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import numpy as np import tensorflow as tf from tensorflow import keras def get_data(): boston_housing = keras.datasets.boston_housing (train_data, train_labels), (test_data, test_labels) = boston_housing.load_data(test_split=0.3) return train_data, np.expand_dims(train_labels, axis=1), test_data, np.expand_dims(test_labels, axis=1) x_train, y_train, x_test, y_test = get_data() #print(x_train.shape) #print(x_test.shape) #print(y_train.shape) num_inputs = x_train.shape[-1] num_outputs = y_train.shape[-1] print(num_inputs) print(num_outputs) x_input = tf.placeholder(tf.float32, [None, num_inputs]) y_input = tf.placeholder(tf.float32, [None, num_outputs]) theta0_var = tf.Variable(np.zeros([num_outputs], dtype=np.float32)) thetan_var = tf.Variable(np.zeros([num_inputs, num_outputs], dtype=np.float32)) y_output = theta0_var + tf.matmul(x_input, thetan_var) cost_output = tf.reduce_mean((y_input - y_output) ** 2) learning_rate = 0.001 train_step = tf.train.AdamOptimizer(learning_rate).minimize(cost_output) #print('Done') max_epoch = 10000 check_point = max_epoch / 5 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(max_epoch): sess.run(train_step, feed_dict={x_input: x_train, y_input: y_train}) if (i + 1) % check_point == 0: print('Done: {}%'.format(i / max_epoch * 100)) train_cost = sess.run(cost_output, feed_dict={x_input: x_train, y_input: y_train}) test_cost = sess.run(cost_output, feed_dict={x_input: x_test, y_input: y_test}) print('train cost: {:.2f}, test cost: {:.2f}'.format(train_cost, test_cost)) | cs |
Copyright © 2018. Alina Inc. All Rights Reserved.
Copyright © 2018. Alina Inc. All Rights Reserved.
Copyright © 2018. Alina Inc. All Rights Reserved.
Copyright © 2018. Alina Inc. All Rights Reserved.
Copyright © 2018. Alina Inc. All Rights Reserved.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error from sklearn.pipeline import Pipeline from sklearn.preprocessing import PolynomialFeatures def true_fun(X): return np.cos(1.5 * np.pi * X) def generate_data(size): X = np.sort(np.random.rand(size)) y = true_fun(X) + np.random.randn(size) * 0.1 return X[:, np.newaxis], y def build_model(degree): polynomial_features = PolynomialFeatures(degree=degree, include_bias=False) linear_regression = LinearRegression() return Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)]) def get_mse(model, X, y): y_pred = model.predict(X) return mean_squared_error(y, y_pred) def plot_model(model, X_train, y_train): X = np.linspace(0, 1, 100) plt.plot(X, model.predict(X[:, np.newaxis]), label="Model") plt.plot(X, true_fun(X), label="True function") plt.scatter(X_train, y_train, edgecolor='b', s=20, label="Samples") plt.xlabel("x") plt.ylabel("y") plt.xlim((0, 1)) plt.ylim((-2, 2)) plt.legend(loc="best") plt.title('Degree {}\nMSE = {:.2e}'.format(d, mses['train'][-1])) print('done') np.random.seed(0) data_size = 70 X_train, y_train = generate_data(int(data_size * 0.7)) X_val, y_val = generate_data(int(data_size * 0.3)) max_degree = 35 degrees = range(1, max_degree + 1) mses = {'train': [], 'val': []} plot_degrees = [1, 4, max_degree] plt.close('all') plt.figure(figsize=(14,5)) for d in degrees: model = build_model(d) model.fit(X_train, y_train) mses['train'].append(get_mse(model, X_train, y_train)) mses['val'].append(get_mse(model, X_val, y_val)) if d in plot_degrees: ax = plt.subplot(1, len(plot_degrees), plot_degrees.index(d) + 1) plt.setp(ax, xticks=(), yticks=()) plot_model(model, X_train, y_train) plt.show() plt.close('all') plt.plot(degrees, mses['train']) plt.plot(degrees, mses['val']) plt.xlabel('model complexity(degree of polynomial)') plt.ylabel('mean squared error') plt.legend(['train', 'validation'], loc='upper center') plt.show() | cs |
Copyright © 2018. Alina Inc. All Rights Reserved.
Copyright © 2018. Alina Inc. All Rights Reserved.
Copyright © 2018. Alina Inc. All Rights Reserved.
Copyright © 2018. Alina Inc. All Rights Reserved.
Copyright © 2018. Alina Inc. All Rights Reserved.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | import numpy as np import pandas as pd import urllib.request from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score from sklearn.metrics import precision_recall_curve import matplotlib.pyplot as plt def lr_train_and_predict(X, y): m = LogisticRegression().fit(X, y) return m, m.predict(X) def draw_precision_recall_graph(precision, recall): plt.close('all') plt.step(recall, precision, color='b', alpha=0.2, where='post') step_kwargs = {'step': 'post'} plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs) plt.xlabel('Recall') plt.ylabel('Precision') plt.show() # 데이터 다운로드 url = 'https://raw.githubusercontent.com/salopge/datasets/master/skewed_data.csv' urllib.request.urlretrieve(url, './skewed_data.csv') # 평형/기울어짐 2개의 상태로 나누기 data = pd.read_csv('skewed_data.csv', names=['var1', 'var2', 'var3', 'label']) # 데이터 학습 및 예측 X_org = data.loc[:, 'var1':'var3'] y_org = data.label model_org, pred_y_org = lr_train_and_predict(X_org, y_org) y_prob_org = model_org.predict_proba(X_org)[:, 1] print(y_prob_org[:5]) precision, recall, threshold = precision_recall_curve(y_org, y_prob_org) print('pre', precision[:5]) print('reca', recall[:5]) print(threshold[:5]) draw_precision_recall_graph(precision, recall) f1_score = 2 * precision * recall / (precision + recall) print(f1_score[:5]) best_threshold = threshold[np.argmax(f1_score)] print(best_threshold) | cs |
'공부 > AI' 카테고리의 다른 글
Neural Network 공부했다. - Multi Layer Neural Network && Autoencoder (0) | 2018.12.24 |
---|---|
Neural Network 공부했다.- Single Layer Neural Network (0) | 2018.12.22 |
Recommender System 공부했다. (0) | 2018.12.16 |
Logistic Regression 공부했다. (0) | 2018.12.14 |
Linear Regression 공부했다 (0) | 2018.12.14 |