SQL 데이터베이스 데이터 가져오기

import pandas as pd
import numpy as np
import pymssql
from mysql import connector

query = 'SELECT studentid, school, sex, age, famsize,\
        medu AS mothereducation, fedu AS fatereducation,\
        traveltime, studytime, failures, famrel, freetime,\
        goout, g1 AS gradeperiod1, g2 AS gradeperiod2,\
        g3 AS gradeperiod3 From studentmath'

server = 'pdcc.c9sqqzd5fulv.us-west-2.rds.amazonaws.com'
user = 'pdccuser'
password = 'pdccpass'
database = 'pdcctest'

conn = pymssql.connect(server = server,
                      user =user, password = password, database = database)

studentmath = pd.read_sql(query,conn)
conn.close()

studentmath.head()

studentmath.dtypes

host ='pdccmysql.c9sqqzd5fulv.us-west-2.rds.amazonaws.com'
user = 'pdccuser'
password = 'pdccpass'
database = 'pdccschema'
connmysql = mysql.connector.connect(host=host,
                                   database=database, user=user, password = password)

studentmath = pd.read_sql(query,connmysql)
connmysql.close()

newcolorder = ['studentid', 'gradeperiod1', 'gradeperiod2',
              'gradeperiod3', 'school', 'sex', 'age', 'famsize',
              'mothereducation', 'fatereducation', 'traveltime',
              'studytime', 'freetime', 'failures', 'famrel', 'goout']

studentmath = studentmath[newcolorder]

# 이걸로 studentid의 갯수를 세어주고
studentmath.studentid.count()

# 이걸로 studentid에서 센 숫자가 중복값은 아닌지 확인해준다.
studentmath.studentid.nunique()

# 두개의 값이 같다면, 중복값은 없다고 우선은 생각할 수 있다.

studentmath.set_index('studentid', inplace=True)

studentmath.count()

import numpy as np

setvalues = {'famrel':{1:'1:very bad', 2:'2:bad', 3:'3:neutral',
                      4:'4:good', 5:'5:excellent'},
            'freetime':{1:'1:very low', 2:'2:low', 3:'3:neutral',
                      4:'4:high', 5:'5:very high'},
            'goout':{1:'1:very low', 2:'2:low', 3:'3:neutral',
                    4:'4:high', 5:'5:very high'},
            'mothereducation':{0:np.nan, 1:'1:k-4', 2:'2:5-9',
                              3:'3:secondary ed', 4:'4:higher ed'},
            'fatereducation':{0:np.nan, 1:'1:k-4', 2:'2:5-9',
                             3:'3:secondary ed', 4:'4:higher ed'}}

studentmath.replace(setvalues, inplace=True)
setvalueskeys = [k for k in setvalues]

studentmath[setvalueskeys].memory_usage(index=False)

for col in studentmath[setvalueskeys].columns:
    studentmath[col] = studentmath[col].astype('category')
    
    
studentmath[setvalueskeys].memory_usage(index=False)

studentmath['famrel'].value_counts(sort=False, normalize=True)

studentmath[['freetime', 'goout']].\
    apply(pd.Series.value_counts, sort=False, normalize=True)

studentmath[['mothereducation', 'fatereducation']].\
    apply(pd.Series.value_counts, sort=False, normalize=True)

'개발일지 > 임시카테고리' 카테고리의 다른 글

python 정규표현식 re로 문자열 안의 값을 꺼내기, 변경해주기 (0)	2022.07.08
Python 기본용어 원리 (strip, split, join, upper, lower, capitalize) (0)	2022.07.08
pymssql 설치에러 해결! (0)	2022.07.08
pandas excel 가져오기 루틴 (0)	2022.07.08
pandas로 데이터 가져오기 루틴 1 (csv파일 읽기) (0)	2022.07.08

다니엘의 개발 이야기

SQL 데이터베이스 데이터 가져오기

'개발일지 > 임시카테고리' 카테고리의 다른 글

티스토리툴바

SQL 데이터베이스 데이터 가져오기

'개발일지 > 임시카테고리' 카테고리의 다른 글

관련글

티스토리툴바