You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
4.4 KiB

5 years ago
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. import warnings
  6. import os
  7. import tensorflow as tf
  8. import FinanceDataReader as fdr
  9. #from Kiwoom import *
  10. warnings.filterwarnings('ignore')
  11. plt.rcParams['font.family'] = 'NanumGothic'
  12. # 삼성전자(005930) 전체 (1996-11-05 ~ 현재)
  13. # 데브시스터즈(194480)
  14. # gs리테일(007070)
  15. # 쎄미시스코(136510)
  16. #해당시점 이후의 데이터
  17. #apple = fdr.DataReader('AAPL', '2017')
  18. STOCK_CODE = '005930'
  19. stock = fdr.DataReader(STOCK_CODE)
  20. stock.head()
  21. stock.tail()
  22. stock.index
  23. stock['Year'] = stock.index.year
  24. stock['Month'] = stock.index.month
  25. stock['Day'] = stock.index.day
  26. stock.head()
  27. #plt.figure(figsize=(16, 9))
  28. #sns.lineplot(y=stock['Close'], x=stock.index)
  29. #plt.xlabel('time')
  30. #plt.ylabel('price')
  31. #time_steps = [['1990', '2000'],
  32. # ['2000', '2010'],
  33. # ['2010', '2015'],
  34. # ['2015', '2021']]
  35. #fig, axes = plt.subplots(2, 2)
  36. ##fig.set_size_inches(16, 9)
  37. #for i in range(4):
  38. # ax = axes[i//2, i%2]
  39. # df = stock.loc[(stock.index > time_steps[i][0]) & (stock.index < time_steps[i][1])]
  40. #sns.lineplot(y=df['Close'], x=df.index, ax=ax)
  41. #ax.set_title(f'{time_steps[i][0]}~{time_steps[i][1]}')
  42. #ax.set_xlabel('time')
  43. #ax.set_ylabel('price')
  44. #plt.tight_layout()
  45. #plt.show()
  46. from sklearn.preprocessing import MinMaxScaler
  47. scaler = MinMaxScaler()
  48. # 스케일을 적용할 column을 정의합니다.
  49. scale_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
  50. # 스케일 후 columns
  51. scaled = scaler.fit_transform(stock[scale_cols])
  52. scaled
  53. df = pd.DataFrame(scaled, columns=scale_cols)
  54. from sklearn.model_selection import train_test_split
  55. x_train, x_test, y_train, y_test = train_test_split(df.drop('Close', 1), df['Close'], test_size=0.2, random_state=0, shuffle=False)
  56. x_train.shape, y_train.shape
  57. x_test.shape, y_test.shape
  58. x_train
  59. #데이터셋구성
  60. def windowed_dataset(series, window_size, batch_size, shuffle):
  61. series = tf.expand_dims(series, axis=-1)
  62. ds = tf.data.Dataset.from_tensor_slices(series)
  63. ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
  64. ds = ds.flat_map(lambda w: w.batch(window_size + 1))
  65. if shuffle:
  66. ds = ds.shuffle(1000)
  67. ds = ds.map(lambda w: (w[:-1], w[-1]))
  68. return ds.batch(batch_size).prefetch(1)
  69. WINDOW_SIZE=20
  70. BATCH_SIZE=32
  71. train_data = windowed_dataset(y_train, WINDOW_SIZE, BATCH_SIZE, True)
  72. test_data = windowed_dataset(y_test, WINDOW_SIZE, BATCH_SIZE, False)
  73. # 아래의 코드로 데이터셋의 구성을 확인해 볼 수 있습니다.
  74. #X: (batch_size, window_size, feature)
  75. #Y: (batch_size, feature)
  76. for data in train_data.take(1):
  77. print(f'데이터셋(X) 구성(batch_size, window_size, feature갯수): {data[0].shape}')
  78. print(f'데이터셋(Y) 구성(batch_size, window_size, feature갯수): {data[1].shape}')
  79. #모델
  80. from tensorflow.keras.models import Sequential
  81. from tensorflow.keras.layers import Dense, LSTM, Conv1D, Lambda
  82. from tensorflow.keras.losses import Huber
  83. from tensorflow.keras.optimizers import Adam
  84. from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
  85. model = Sequential([
  86. # 1차원 feature map 생성
  87. Conv1D(filters=32, kernel_size=5,
  88. padding="causal",
  89. activation="relu",
  90. input_shape=[WINDOW_SIZE, 1]),
  91. # LSTM
  92. LSTM(16, activation='tanh'),
  93. Dense(16, activation="relu"),
  94. Dense(1),
  95. ])
  96. # Sequence 학습에 비교적 좋은 퍼포먼스를 내는 Huber()를 사용합니다.
  97. loss = Huber()
  98. optimizer = Adam(0.0005)
  99. model.compile(loss=Huber(), optimizer=optimizer, metrics=['mse'])
  100. # earlystopping은 10번 epoch통안 val_loss 개선이 없다면 학습을 멈춥니다.
  101. earlystopping = EarlyStopping(monitor='val_loss', patience=10)
  102. # val_loss 기준 체크포인터도 생성합니다.
  103. filename = os.path.join('tmp', 'ckeckpointer.ckpt')
  104. checkpoint = ModelCheckpoint(filename,
  105. save_weights_only=True,
  106. save_best_only=True,
  107. monitor='val_loss',
  108. verbose=1)
  109. history = model.fit(train_data,
  110. validation_data=(test_data),
  111. epochs=50,
  112. callbacks=[checkpoint, earlystopping])
  113. model.load_weights(filename)
  114. pred = model.predict(test_data)
  115. pred.shape
  116. plt.figure(figsize=(12, 9))
  117. plt.plot(np.asarray(y_test)[20:], label='actual')
  118. plt.plot(pred, label='prediction')
  119. plt.grid()
  120. plt.legend(loc='best')
  121. #plt.tight_layout()
  122. plt.show()