You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
154 lines
4.4 KiB
154 lines
4.4 KiB
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
import warnings
|
|
import os
|
|
import tensorflow as tf
|
|
import FinanceDataReader as fdr
|
|
#from Kiwoom import *
|
|
|
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
plt.rcParams['font.family'] = 'NanumGothic'
|
|
|
|
|
|
# 삼성전자(005930) 전체 (1996-11-05 ~ 현재)
|
|
# 데브시스터즈(194480)
|
|
# gs리테일(007070)
|
|
# 쎄미시스코(136510)
|
|
|
|
#해당시점 이후의 데이터
|
|
#apple = fdr.DataReader('AAPL', '2017')
|
|
|
|
STOCK_CODE = '005930'
|
|
stock = fdr.DataReader(STOCK_CODE)
|
|
stock.head()
|
|
|
|
stock.tail()
|
|
stock.index
|
|
|
|
stock['Year'] = stock.index.year
|
|
stock['Month'] = stock.index.month
|
|
stock['Day'] = stock.index.day
|
|
stock.head()
|
|
|
|
#plt.figure(figsize=(16, 9))
|
|
#sns.lineplot(y=stock['Close'], x=stock.index)
|
|
#plt.xlabel('time')
|
|
#plt.ylabel('price')
|
|
|
|
#time_steps = [['1990', '2000'],
|
|
# ['2000', '2010'],
|
|
# ['2010', '2015'],
|
|
# ['2015', '2021']]
|
|
|
|
#fig, axes = plt.subplots(2, 2)
|
|
##fig.set_size_inches(16, 9)
|
|
#for i in range(4):
|
|
# ax = axes[i//2, i%2]
|
|
# df = stock.loc[(stock.index > time_steps[i][0]) & (stock.index < time_steps[i][1])]
|
|
#sns.lineplot(y=df['Close'], x=df.index, ax=ax)
|
|
#ax.set_title(f'{time_steps[i][0]}~{time_steps[i][1]}')
|
|
#ax.set_xlabel('time')
|
|
#ax.set_ylabel('price')
|
|
#plt.tight_layout()
|
|
#plt.show()
|
|
|
|
from sklearn.preprocessing import MinMaxScaler
|
|
|
|
scaler = MinMaxScaler()
|
|
# 스케일을 적용할 column을 정의합니다.
|
|
scale_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
|
|
# 스케일 후 columns
|
|
scaled = scaler.fit_transform(stock[scale_cols])
|
|
scaled
|
|
|
|
df = pd.DataFrame(scaled, columns=scale_cols)
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(df.drop('Close', 1), df['Close'], test_size=0.2, random_state=0, shuffle=False)
|
|
|
|
x_train.shape, y_train.shape
|
|
x_test.shape, y_test.shape
|
|
x_train
|
|
|
|
#데이터셋구성
|
|
def windowed_dataset(series, window_size, batch_size, shuffle):
|
|
series = tf.expand_dims(series, axis=-1)
|
|
ds = tf.data.Dataset.from_tensor_slices(series)
|
|
ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
|
|
ds = ds.flat_map(lambda w: w.batch(window_size + 1))
|
|
if shuffle:
|
|
ds = ds.shuffle(1000)
|
|
ds = ds.map(lambda w: (w[:-1], w[-1]))
|
|
return ds.batch(batch_size).prefetch(1)
|
|
|
|
WINDOW_SIZE=20
|
|
BATCH_SIZE=32
|
|
|
|
train_data = windowed_dataset(y_train, WINDOW_SIZE, BATCH_SIZE, True)
|
|
test_data = windowed_dataset(y_test, WINDOW_SIZE, BATCH_SIZE, False)
|
|
|
|
# 아래의 코드로 데이터셋의 구성을 확인해 볼 수 있습니다.
|
|
#X: (batch_size, window_size, feature)
|
|
#Y: (batch_size, feature)
|
|
for data in train_data.take(1):
|
|
print(f'데이터셋(X) 구성(batch_size, window_size, feature갯수): {data[0].shape}')
|
|
print(f'데이터셋(Y) 구성(batch_size, window_size, feature갯수): {data[1].shape}')
|
|
|
|
#모델
|
|
from tensorflow.keras.models import Sequential
|
|
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Lambda
|
|
from tensorflow.keras.losses import Huber
|
|
from tensorflow.keras.optimizers import Adam
|
|
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
|
|
|
|
|
|
|
|
model = Sequential([
|
|
# 1차원 feature map 생성
|
|
Conv1D(filters=32, kernel_size=5,
|
|
padding="causal",
|
|
activation="relu",
|
|
input_shape=[WINDOW_SIZE, 1]),
|
|
# LSTM
|
|
LSTM(16, activation='tanh'),
|
|
Dense(16, activation="relu"),
|
|
Dense(1),
|
|
])
|
|
|
|
# Sequence 학습에 비교적 좋은 퍼포먼스를 내는 Huber()를 사용합니다.
|
|
loss = Huber()
|
|
optimizer = Adam(0.0005)
|
|
model.compile(loss=Huber(), optimizer=optimizer, metrics=['mse'])
|
|
|
|
# earlystopping은 10번 epoch통안 val_loss 개선이 없다면 학습을 멈춥니다.
|
|
earlystopping = EarlyStopping(monitor='val_loss', patience=10)
|
|
# val_loss 기준 체크포인터도 생성합니다.
|
|
filename = os.path.join('tmp', 'ckeckpointer.ckpt')
|
|
checkpoint = ModelCheckpoint(filename,
|
|
save_weights_only=True,
|
|
save_best_only=True,
|
|
monitor='val_loss',
|
|
verbose=1)
|
|
|
|
history = model.fit(train_data,
|
|
validation_data=(test_data),
|
|
epochs=50,
|
|
callbacks=[checkpoint, earlystopping])
|
|
|
|
model.load_weights(filename)
|
|
pred = model.predict(test_data)
|
|
pred.shape
|
|
|
|
|
|
plt.figure(figsize=(12, 9))
|
|
plt.plot(np.asarray(y_test)[20:], label='actual')
|
|
plt.plot(pred, label='prediction')
|
|
plt.grid()
|
|
plt.legend(loc='best')
|
|
#plt.tight_layout()
|
|
plt.show()
|