본문 바로가기
Analysis

30 days Rolling Correlation violin plot

by DATAUNION 2024. 1. 26.
반응형
import pandas as pd
from sqlalchemy import create_engine
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# SQLAlchemy 엔진 설정
engine = create_engine("mysql+pymysql://root:1*****s@localhost/krxstock?charset=utf8")

# 모든 테이블 이름 가져오기
table_query = "SHOW TABLES"
tables = pd.read_sql(table_query, con=engine)
table_names = tables.iloc[:, 0].tolist()

# 날짜 설정
start_date = pd.Timestamp('2023-01-01')
end_date = pd.Timestamp('2023-03-30')

# 세 종목에 대해 실행
for stock_code in ['a017670', 'a056080', 'a005930']:
    # 특정 종목의 'close' 데이터 가져오기
    query = f"SELECT date, close FROM {stock_code}"
    df_stock = pd.read_sql(query, con=engine)
    df_stock['date'] = pd.to_datetime(df_stock['date'])
    df_stock.set_index('date', inplace=True)
    df_stock = df_stock.loc[start_date:end_date]

    # 상관계수를 저장할 리스트 생성
    correlation_data = []

    # 모든 테이블에 대한 'close' 값 상관계수 계산
    for table in table_names:
        if table == stock_code:
            continue
        query = f"SELECT date, close FROM {table}"
        df = pd.read_sql(query, con=engine)
        df['date'] = pd.to_datetime(df['date'])
        df.set_index('date', inplace=True)

        # 두 데이터프레임을 병합
        merged_df = pd.merge(df_stock, df, left_index=True, right_index=True, how='inner', suffixes=(f'_{stock_code}', f'_{table}'))

        # 30일 롤링 윈도우로 상관계수 계산
        rolling_corr = merged_df[f'close_{stock_code}'].rolling(window=30).corr(merged_df[f'close_{table}'])

        # 지정된 날짜 범위의 데이터만 필터링
        filtered_corr = rolling_corr.loc[start_date:end_date]

        # 일별로 분류하고 평균 상관계수 계산
        for date, group in filtered_corr.groupby(filtered_corr.index):
            correlation_data.append({'Date': date, 'Correlation': group.mean()})

    # 데이터 프레임으로 변환
    df_corr_by_day = pd.DataFrame(correlation_data)
    df_corr_by_day.set_index('Date', inplace=True)

    # NaN과 inf 값 제거
    df_corr_by_day = df_corr_by_day.dropna()
    df_corr_by_day = df_corr_by_day.replace([np.inf, -np.inf], np.nan).dropna()

    # CSV 파일로 저장
    csv_filename = f"{stock_code}_correlation_data.csv"
    df_corr_by_day.to_csv(csv_filename)

    # Close Price 시계열 차트
    plt.figure(figsize=(16, 8))
    plt.plot(df_stock.index, df_stock['close'], color='tab:red')
    plt.title(f'Close Price of {stock_code}')
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.show()

    # Violin Plot for Correlations
    plt.figure(figsize=(16, 8))
    sns.violinplot(x=df_corr_by_day.index, y=df_corr_by_day['Correlation'], inner='stick')
    plt.title(f'30-day Rolling Correlation with {stock_code}')
    plt.xlabel('Date')
    plt.ylabel('Correlation')
    plt.xticks(rotation=90)
    plt.show()

# 연결 종료
engine.dispose()

 

 

https://chat.openai.com/g/g-X1vYUg5J2-dataunion

If you have any questions, contact GPT and get insight!!

 

ChatGPT - DATAUNION

Stock Analysis

chat.openai.com

 

반응형

댓글