320x100
전체적으로 시험을 염두하고 강의한 섹션이였는데,
처음엔 모르다가도, 그래도 연결고리를 들으니 완전 마지막이나, 부분적으로 왜 그렇게 하는지
이해가 안간 것도 몇가지 있었으나 그 외의것은 아주 짜임새 있고 쓸모가 많다는
유용한 섹션이라는 생각이 들었다.
* Step은 중간에 안쓴것도 있다. 그냥 없는대로 흘려서 보자.
# Some Guidance and Hints
## Step 1: Getting Started
import pandas as pd
import numpy as np
summer = pd.read_csv('summer.csv')
wik_1996 = pd.read_csv('wik_1996.csv')
wik_1976 = pd.read_csv('wik_1976.csv')
summer.head()
'''
Year City Sport Discipline Athlete Country Gender Event Medal
0 1896 Athens Aquatics Swimming HAJOS, Alfred HUN Men 100M Freestyle Gold
1 1896 Athens Aquatics Swimming HERSCHMANN, Otto AUT Men 100M Freestyle Silver
2 1896 Athens Aquatics Swimming DRIVAS, Dimitrios GRE Men 100M Freestyle For Sailors Bronze
3 1896 Athens Aquatics Swimming MALOKINIS, Ioannis GRE Men 100M Freestyle For Sailors Gold
4 1896 Athens Aquatics Swimming CHASAPIS, Spiridon GRE Men 100M Freestyle For Sailors Silver
'''
summer.tail()
'''
Year City Sport Discipline Athlete Country Gender Event Medal
31160 2012 London Wrestling Wrestling Freestyle JANIKOWSKI, Damian POL Men Wg 84 KG Bronze
31161 2012 London Wrestling Wrestling Freestyle REZAEI, Ghasem Gholamreza IRI Men Wg 96 KG Gold
31162 2012 London Wrestling Wrestling Freestyle TOTROV, Rustam RUS Men Wg 96 KG Silver
31163 2012 London Wrestling Wrestling Freestyle ALEKSANYAN, Artur ARM Men Wg 96 KG Bronze
31164 2012 London Wrestling Wrestling Freestyle LIDBERG, Jimmy SWE Men Wg 96 KG Bronze
'''
# 컬럼을 기준으로 결측값이 있는 행을 리턴
# 결측치 리턴
# 결측값 리턴
# Nan 리턴
summer.isna().any(axis = 1)
'''
0 False
1 False
2 False
3 False
4 False
...
31160 False
31161 False
31162 False
31163 False
31164 False
Length: 31165, dtype: bool
'''
summer.loc[summer.isna().any(axis = 1)]
'''
Year City Sport Discipline Athlete Country Gender Event Medal
29603 2012 London Athletics Athletics Pending NaN Women 1500M Gold
31072 2012 London Weightlifting Weightlifting Pending NaN Women 63KG Gold
31091 2012 London Weightlifting Weightlifting Pending NaN Men 94KG Silver
31110 2012 London Wrestling Wrestling Freestyle KUDUKHOV, Besik NaN Men Wf 60 KG Silver
'''
summer.dropna(inplace = True)
wik_1976.head()
'''
Rank NOC Gold Silver Bronze Total
0 1 Soviet Union (URS) 49 41 35 125
1 2 East Germany (GDR) 40 25 25 90
2 3 United States (USA) 34 35 25 94
3 4 West Germany (FRG) 10 12 17 39
4 5 Japan (JPN) 9 6 10 25
'''
wik_1996.head()
'''
Rank Nation Gold Silver Bronze Total
0 1 United States (USA)* 44 32 25 101
1 2 Russia (RUS) 26 21 16 63
2 3 Germany (GER) 20 18 27 65
3 4 China (CHN) 16 22 12 50
4 5 France (FRA) 15 7 15 37
'''
wik_1976.NOC.str.split('(', expand = True).iloc[:, 1].str.replace(')', '').str.replace('*','')
# 1. wik_1976 변수에 들어있는
# 2. NOC 컬럼 안의 값 중에서
# 3. 문자열 값이 있는데 (str)
# 4. 그 중에서 ( 라는 녀석을 찾아서
# 5. 없애주고, 그 없앤 ( 를 기준해서 양옆으로 각 한개씩으로 나눠라.
# 6. 나눈 후에는 DataFrame화 해줘라. ( expand = True)
wik_1976.NOC.str.split('(', expand = True)
# (를 기준으로 분할이 된 것 중에서 전체의 행, 그 중에서 1번 컬럼 인덱스를 가진 것을 리턴해줘
wik_1976.NOC.str.split('(', expand = True).iloc[:, 1]
# 그리고 다시 ) 라는 문자를 찾아서 아무것도 없는 것으로 변환해줘
wik_1976.NOC.str.split('(', expand = True).iloc[:,1].str.replace(')', '')
wik_1976.head()
'''
Rank NOC Gold Silver Bronze Total
0 1 Soviet Union (URS) 49 41 35 125
1 2 East Germany (GDR) 40 25 25 90
2 3 United States (USA) 34 35 25 94
3 4 West Germany (FRG) 10 12 17 39
4 5 Japan (JPN) 9 6 10 25
'''
# 중간에 CAN* 라는게 껴있었어서 깔끔하게 해주기 위해 마지막 작업을 해주었다.
# 데이터 클리닝을 위해서 매우 중요한 루틴이다.
wik_1976.NOC.str.split('(', expand = True).iloc[:,1].str.replace(')', '').str.replace('*','')
wik_1976.head()
'''
Rank NOC Gold Silver Bronze Total
0 1 Soviet Union (URS) 49 41 35 125
1 2 East Germany (GDR) 40 25 25 90
2 3 United States (USA) 34 35 25 94
3 4 West Germany (FRG) 10 12 17 39
4 5 Japan (JPN) 9 6 10 25
'''
wik_1976['Country'] = wik_1976.NOC.str.split('(', expand = True).iloc[:,1].str.replace(')', '').str.replace('*',"")
# Country, Gold, silver, bronze를 제외한 모든 컬럼 날려버리고
# Country를 기준 컬럼으로 잡는다.
# inplace = True같이 저장되는 형태의 변동사항이 아니기 때문에
# 변수에 지정해주는 식으로 이렇게 해주기
wik_1976 = wik_1976.drop(columns = ['Rank', 'NOC', 'Total']).set_index('Country')
wik_1976
'''
Gold Silver Bronze
Country
URS 49 41 35
GDR 40 25 25
USA 34 35 25
FRG 10 12 17
JPN 9 6 10
POL 7 6 13
BUL 6 9 7
CUB 6 4 3
ROU 4 9 14
HUN 4 5 13
FIN 4 2 0
SWE 4 1 0
GBR 3 5 5
ITA 2 7 4
FRA 2 3 4
YUG 2 3 3
TCH 2 2 4
NZL 2 1 1
KOR 1 1 4
SUI 1 1 2
JAM 1 1 0
PRK 1 1 0
NOR 1 1 0
DEN 1 0 2
MEX 1 0 1
TRI 1 0 0
CAN 0 5 6
BEL 0 3 3
NED 0 2 3
POR 0 2 0
ESP 0 2 0
AUS 0 1 4
IRI 0 1 1
MGL 0 1 0
VEN 0 1 0
BRA 0 0 2
AUT 0 0 1
BER 0 0 1
PAK 0 0 1
PUR 0 0 1
THA 0 0 1
41 NOCs 198 199 216
'''
wik_1996.Nation.str.split('(', expand = True)
'''
0 1
0 United States USA)*
1 Russia RUS)
2 Germany GER)
3 China CHN)
4 France FRA)
... ... ...
75 Mozambique MOZ)
76 Puerto Rico PUR)
77 Tunisia TUN)
78 Uganda UGA)
79 Totals 79 nations)
80 rows × 2 columns
'''
wik_1996['Country'] = wik_1996.Nation.str.split('(', expand = True).iloc[:, 1].str.replace(')','').str.replace('*','')
wik_1996.head(1)
'''
Rank Nation Gold Silver Bronze Total Country
0 1 United States (USA)* 44 32 25 101 USA
'''
wik_1996 = wik_1996.drop(columns = ['Rank', 'Nation','Total']).set_index('Country')
wik_1996
'''
Gold Silver Bronze
Country
USA 44 32 25
RUS 26 21 16
GER 20 18 27
CHN 16 22 12
FRA 15 7 15
... ... ... ...
MOZ 0 0 1
PUR 0 0 1
TUN 0 0 1
UGA 0 0 1
79 nations 271 273 298
80 rows × 3 columns
'''
## Step 2: Craeting the Columns Event_Gender
summer['Event_Gender'] = summer['Gender']
summer.head()
'''
Year City Sport Discipline Athlete Country Gender Event Medal Event_Gender
0 1896 Athens Aquatics Swimming HAJOS, Alfred HUN Men 100M Freestyle Gold Men
1 1896 Athens Aquatics Swimming HERSCHMANN, Otto AUT Men 100M Freestyle Silver Men
2 1896 Athens Aquatics Swimming DRIVAS, Dimitrios GRE Men 100M Freestyle For Sailors Bronze Men
3 1896 Athens Aquatics Swimming MALOKINIS, Ioannis GRE Men 100M Freestyle For Sailors Gold Men
4 1896 Athens Aquatics Swimming CHASAPIS, Spiridon GRE Men 100M Freestyle For Sailors Silver Men
'''
# 문제 1. The Event Column contains the string 'mixed'
# 매우 중요한 루틴
# 시작부터 놀랐다.
# 대문자가 섞여있기 때문에, 그 안에서
summer.Event.str.lower()
'''
0 100m freestyle
1 100m freestyle
2 100m freestyle for sailors
3 100m freestyle for sailors
4 100m freestyle for sailors
...
31160 wg 84 kg
31161 wg 96 kg
31162 wg 96 kg
31163 wg 96 kg
31164 wg 96 kg
Name: Event, Length: 31161, dtype: object
'''
# 지정한 컬럼 안에서 mixed라는 단어 찾기
summer.Event.str.lower().str.contains('mixed')
summer.Event.str.lower().str.contains('mixed').sum()
# 38
# scala로 sum 처리를 한다.
# 문제2. The Event Column contains the string 'pairs'
summer.Event.str.lower().str.contains('pairs').sum()
# 12
# 문제3. All 'Equestrian' Events have been mixed Events.
# Equestrian 이라는 게 Sport 컬럼안에 있는지 확인
(summer.Sport == 'Equestrian').sum()
# 939
# 뭐가 있긴 있다는 건데..
summer.Sport.str.lower()
'''
0 aquatics
1 aquatics
2 aquatics
3 aquatics
4 aquatics
...
31160 wrestling
31161 wrestling
31162 wrestling
31163 wrestling
31164 wrestling
Name: Sport, Length: 31161, dtype: object
'''
summer.Sport.str.lower().str.contains('equestrian').sum()
# 939
# 계속 느끼지만 문제가 진짜 이상하다.
# 문제4. All 'Sailing' Events before 1988 have been mixed Events.
(summer.Sport == 'Sailing').sum()
# 1109
summer.Sport.str.lower().str.contains('sailing').sum()
# 1109
(summer.Sport.str.lower().str.contains('sailing') & summer.Year < 1988).sum()
# 31161
# 음... 이건 연산이 잘못작동되도록 만들어진 프로그램 같다.
# 전체의 것에 대하여 1988이전일리가..
# 결과값으로 나오는 수가 자꾸 이상한 것 같아서
# 1988년 이후의 연도도 있는지 확인해줬다.
summer.sort_values(by = 'Year', ascending= False)
(summer.Year < 1988).sum()
# 18051
# 먹긴 먹는거겠지?
# SQL처럼 AND라는 명령어는 Value Error를 초래하고
# &을 사용해줌으로 해결이 되었다.
((summer.Sport.str.lower().str.contains('sailing')) & (summer.Year < 1988)).sum()
# 755
mask1 = summer.Event.str.lower().str.contains('mixed')
mask2 = summer.Event.str.lower().str.contains('pairs')
mask3 = summer.Event.str.lower().str.contains('equestrain')
mask4 = ((summer.Sport.str.lower().str.contains('sailing')) & (summer.Year < 1988))
# | 는 and였는지 or 였는지 둘중 하나일 것이다.
summer.loc[mask1 | mask2 | mask3 | mask4, 'Event_Gender'] = 'X'
summer.Event_Gender.value_counts()
'''
Men 21968
Women 8388
X 805
Name: Event_Gender, dtype: int64
'''
# 문제5. The following medals (index labels) were awarded in Badminton mixed Double Events
summer['Event_Medals'] = summer.groupby(['Year', 'Sport', 'Discipline', 'Event', 'Event_Gender']).Medal.transform('count')
summer.Event_Medals.value_counts().sort_index()
'''
1 35
2 104
3 9216
4 1684
5 90
...
73 73
74 74
76 76
82 82
116 116
Name: Event_Medals, Length: 66, dtype: int64
'''
summer.loc[summer.Event_Medals == 5]
'''
Year City Sport Discipline Athlete Country Gender Event Medal Event_Gender Event_Medals
1273 1908 London Athletics Athletics ARCHIBALD, Edward Blake CAN Men Pole Vault Bronze Men 5
1274 1908 London Athletics Athletics JACOBS, Charles Sherman USA Men Pole Vault Bronze Men 5
1275 1908 London Athletics Athletics SÖDERSTRÖM, Bruno SWE Men Pole Vault Bronze Men 5
1276 1908 London Athletics Athletics COOKE, Edward Tiffin USA Men Pole Vault Gold Men 5
1277 1908 London Athletics Athletics GILBERT, Alfred Carleten USA Men Pole Vault Gold Men 5
... ... ... ... ... ... ... ... ... ... ... ...
29728 2012 London Athletics Athletics UKHOV, Ivan RUS Men High Jump Gold Men 5
29729 2012 London Athletics Athletics KYNARD, Erik USA Men High Jump Silver Men 5
29730 2012 London Athletics Athletics BARSHIM, Mutaz Essa QAT Men High Jump Bronze Men 5
29731 2012 London Athletics Athletics DROUIN, Derek CAN Men High Jump Bronze Men 5
29732 2012 London Athletics Athletics GRABARZ, Robert GBR Men High Jump Bronze Men 5
90 rows × 11 columns
'''
# 인덱스 정렬
summer.Event_Medals.value_counts().sort_index()
'''
1 35
2 104
3 9216
4 1684
5 90
...
73 73
74 74
76 76
82 82
116 116
Name: Event_Medals, Length: 66, dtype: int64
'''
summer.loc[summer.Event_Medals == 5]
'''
Year City Sport Discipline Athlete Country Gender Event Medal Event_Gender Event_Medals
1273 1908 London Athletics Athletics ARCHIBALD, Edward Blake CAN Men Pole Vault Bronze Men 5
1274 1908 London Athletics Athletics JACOBS, Charles Sherman USA Men Pole Vault Bronze Men 5
1275 1908 London Athletics Athletics SÖDERSTRÖM, Bruno SWE Men Pole Vault Bronze Men 5
1276 1908 London Athletics Athletics COOKE, Edward Tiffin USA Men Pole Vault Gold Men 5
1277 1908 London Athletics Athletics GILBERT, Alfred Carleten USA Men Pole Vault Gold Men 5
... ... ... ... ... ... ... ... ... ... ... ...
29728 2012 London Athletics Athletics UKHOV, Ivan RUS Men High Jump Gold Men 5
29729 2012 London Athletics Athletics KYNARD, Erik USA Men High Jump Silver Men 5
29730 2012 London Athletics Athletics BARSHIM, Mutaz Essa QAT Men High Jump Bronze Men 5
29731 2012 London Athletics Athletics DROUIN, Derek CAN Men High Jump Bronze Men 5
29732 2012 London Athletics Athletics GRABARZ, Robert GBR Men High Jump Bronze Men 5
90 rows × 11 columns
'''
summer['Team'] = pd.Series(np.where(summer.Event_Medals > 5, 'Yes', 'No'))
summer.head()
'''
Year City Sport Discipline Athlete Country Gender Event Medal Event_Gender Event_Medals Team
0 1896 Athens Aquatics Swimming HAJOS, Alfred HUN Men 100M Freestyle Gold Men 2 No
1 1896 Athens Aquatics Swimming HERSCHMANN, Otto AUT Men 100M Freestyle Silver Men 2 No
2 1896 Athens Aquatics Swimming DRIVAS, Dimitrios GRE Men 100M Freestyle For Sailors Bronze Men 3 No
3 1896 Athens Aquatics Swimming MALOKINIS, Ioannis GRE Men 100M Freestyle For Sailors Gold Men 3 No
4 1896 Athens Aquatics Swimming CHASAPIS, Spiridon GRE Men 100M Freestyle For Sailors Silver Men 3 No
'''
summer.Team.value_counts()
'''
summer.Team.value_counts()
1
summer.Team.value_counts()
Yes 20032
No 11125
Name: Team, dtype: int64
'''
## Step 5: Removing Duplicated Medals in Team Events
## The subset for determining Duplicates shall be formed by the Columns 'Year', 'Sport', 'Discipline', 'Country',
## 'Event', 'Event_Gender', 'Medal', Kepp one Medal!!
summer.reset_index(inplace= True)
summer.head()
'''
index Year City Sport Discipline Athlete Country Gender Event Medal Event_Gender Event_Medals Team
0 0 1896 Athens Aquatics Swimming HAJOS, Alfred HUN Men 100M Freestyle Gold Men 2 No
1 1 1896 Athens Aquatics Swimming HERSCHMANN, Otto AUT Men 100M Freestyle Silver Men 2 No
2 2 1896 Athens Aquatics Swimming DRIVAS, Dimitrios GRE Men 100M Freestyle For Sailors Bronze Men 3 No
3 3 1896 Athens Aquatics Swimming MALOKINIS, Ioannis GRE Men 100M Freestyle For Sailors Gold Men 3 No
4 4 1896 Athens Aquatics Swimming CHASAPIS, Spiridon GRE Men 100M Freestyle For Sailors Silver Men 3 No
'''
singles = summer.loc[summer.Team == 'No'].copy()
singles.shape
# (11125, 13)
team = summer.loc[summer.Team == 'Yes'].copy()
team.shape
# (20032, 13)
# 여기가 좀 독특하다.
# 왜 drop_duplicates와
# subset으로 써줬을지 모르겠다.
# 공부하자.
team.drop_duplicates(subset = ['Year', 'Sport', 'Discipline', 'Country', 'Event', 'Event_Gender', 'Medal'], inplace = True)
# drop_duplicates는 중복값을 제거하기 위한 함수라고 하며
# subset은 중복값으로 확인하고 제거하기 위한 값을 받기위한 매개변수라고한다.
team.shape
# (3710, 13)
pd.concat([singles, team]).shape
# (14835, 13)
summer_new = pd.concat([singles, team])
summer_new.set_index('index', inplace = True)
summer.head()
'''
index Year City Sport Discipline Athlete Country Gender Event Medal Event_Gender Event_Medals Team
0 0 1896 Athens Aquatics Swimming HAJOS, Alfred HUN Men 100M Freestyle Gold Men 2 No
1 1 1896 Athens Aquatics Swimming HERSCHMANN, Otto AUT Men 100M Freestyle Silver Men 2 No
2 2 1896 Athens Aquatics Swimming DRIVAS, Dimitrios GRE Men 100M Freestyle For Sailors Bronze Men 3 No
3 3 1896 Athens Aquatics Swimming MALOKINIS, Ioannis GRE Men 100M Freestyle For Sailors Gold Men 3 No
4 4 1896 Athens Aquatics Swimming CHASAPIS, Spiridon GRE Men 100M Freestyle For Sailors Silver Men 3 No
'''
summer_new.info()
'''
<class 'pandas.core.frame.DataFrame'>
Int64Index: 14835 entries, 0 to 31036
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Year 14835 non-null int64
1 City 14835 non-null object
2 Sport 14835 non-null object
3 Discipline 14835 non-null object
4 Athlete 14835 non-null object
5 Country 14835 non-null object
6 Gender 14835 non-null object
7 Event 14835 non-null object
8 Medal 14835 non-null object
9 Event_Gender 14835 non-null object
10 Event_Medals 14835 non-null int64
11 Team 14835 non-null object
dtypes: int64(2), object(10)
memory usage: 1.5+ MB
'''
## Step 6: Creating the official Medal Table for all Editions
# 이 부분은 전체적으로 이해가 안간다.
# 중간의 count() 까지는 그런대로 따라간다고 볼 수 있으나.
# unstack부터는 속수무책으로 이해가 안간다.
medal_tables = summer_new.groupby(['Year', 'Country', 'Medal']).Medal.count().unstack(fill_value = 0)[['Gold', 'Silver', 'Bronze']]
'''
Medal Bronze Gold Silver
Year Country
1896 AUS 0 2 0
AUT 2 2 1
DEN 3 1 2
FRA 2 5 4
GBR 2 2 3
... ... ... ... ...
2012 UGA 0 1 0
UKR 9 6 4
USA 32 46 28
UZB 2 1 0
VEN 0 1 0
1157 rows × 3 columns
'''
## Step 7: Comparison with Wikipedia Medal Tables
medal_tables.head()
'''
Medal Gold Silver Bronze
Year Country
1896 AUS 2 0 0
AUT 2 1 2
DEN 1 2 3
FRA 5 4 2
GBR 2 3 2
'''
medal_tables.loc[1976]
'''
Medal Gold Silver Bronze
Country
AUS 0 1 4
AUT 0 0 1
BEL 0 3 3
BER 0 0 1
BRA 0 0 2
BUL 6 9 7
CAN 0 5 6
CUB 6 4 3
DEN 1 0 2
ESP 0 2 0
FIN 4 2 0
FRA 2 3 4
FRG 12 12 17
GBR 3 5 5
GDR 40 25 25
HUN 4 5 13
IRI 0 1 1
ITA 2 7 4
JAM 1 1 0
JPN 9 6 10
KOR 1 1 4
MEX 1 0 1
MGL 0 1 0
NED 0 2 3
NOR 1 1 0
NZL 2 1 1
PAK 0 0 1
POL 7 6 13
POR 0 2 0
PRK 1 1 0
PUR 0 0 1
ROU 4 9 14
SUI 1 2 2
SWE 4 1 0
TCH 2 2 4
THA 0 0 1
TRI 1 0 0
URS 49 41 35
USA 35 35 25
VEN 0 1 0
YUG 2 3 3
'''
agg_1976 = medal_tables.loc[1976].sort_values(['Gold', 'Silver', 'Bronze'], ascending = False).copy()
agg_1976.head()
'''
Medal Gold Silver Bronze
Country
URS 49 41 35
GDR 40 25 25
USA 35 35 25
FRG 12 12 17
JPN 9 6 10
'''
wik_1976.head()
'''
Gold Silver Bronze
Country
URS 49 41 35
GDR 40 25 25
USA 34 35 25
FRG 10 12 17
JPN 9 6 10
'''
# 결측값 드랍
# 결측값 제거
div_76 = agg_1976.sub(wik_1976).abs().dropna()
'''
Medal Gold Silver Bronze
Country
41 NOCs NaN NaN NaN
AUS 0.0 0.0 0.0
AUT 0.0 0.0 0.0
BEL 0.0 0.0 0.0
BER 0.0 0.0 0.0
BRA 0.0 0.0 0.0
BUL 0.0 0.0 0.0
CAN 0.0 0.0 0.0
CUB 0.0 0.0 0.0
DEN 0.0 0.0 0.0
ESP 0.0 0.0 0.0
FIN 0.0 0.0 0.0
FRA 0.0 0.0 0.0
FRG 2.0 0.0 0.0
GBR 0.0 0.0 0.0
GDR 0.0 0.0 0.0
HUN 0.0 0.0 0.0
IRI 0.0 0.0 0.0
ITA 0.0 0.0 0.0
JAM 0.0 0.0 0.0
JPN 0.0 0.0 0.0
KOR 0.0 0.0 0.0
MEX 0.0 0.0 0.0
MGL 0.0 0.0 0.0
NED 0.0 0.0 0.0
NOR 0.0 0.0 0.0
NZL 0.0 0.0 0.0
PAK 0.0 0.0 0.0
POL 0.0 0.0 0.0
POR 0.0 0.0 0.0
PRK 0.0 0.0 0.0
PUR 0.0 0.0 0.0
ROU 0.0 0.0 0.0
SUI 0.0 1.0 0.0
SWE 0.0 0.0 0.0
TCH 0.0 0.0 0.0
THA 0.0 0.0 0.0
TRI 0.0 0.0 0.0
URS 0.0 0.0 0.0
USA 1.0 0.0 0.0
VEN 0.0 0.0 0.0
YUG 0.0 0.0 0.0
'''
# 이중 sum
# 이런 표현방식은 참 신기하다.
score_76 = div_76.sum().sum()
# 4.0
agg_1996 = medal_tables.loc[1996].sort_values(['Gold', 'Silver', 'Bronze'], ascending = False).copy()
'''
Medal Gold Silver Bronze
Country
USA 44 36 27
RUS 26 21 16
GER 22 18 27
CHN 16 22 12
FRA 15 7 15
... ... ... ...
MGL 0 0 1
MOZ 0 0 1
PUR 0 0 1
TUN 0 0 1
UGA 0 0 1
79 rows × 3 columns
'''
agg_1996.head()
'''
Medal Gold Silver Bronze
Country
USA 44 36 27
RUS 26 21 16
GER 22 18 27
CHN 16 22 12
FRA 15 7 15
'''
wik_1996.head()
'''
Gold Silver Bronze
Country
USA 44 32 25
RUS 26 21 16
GER 20 18 27
CHN 16 22 12
FRA 15 7 15
'''
# 여기서 sub의 기능은 scala 방식으로 뺄셈을 해주는 것이다.
div_96 = agg_1996.sub(wik_1996).abs().dropna()
score_96 = div_96.sum().sum()
# 13.0
300x250
'개발일지 > 임시카테고리' 카테고리의 다른 글
SQL 기초4 IN, LIKE, ILIKE 사용방법 (0) | 2022.08.20 |
---|---|
SQL 기초 3 BETWEEN 및 타임스탬프(timestamp) ft. SQLD 자격증 응시 취소 (0) | 2022.08.20 |
pandas 판다스 기초 22 seaborn 시각화 그래프 (0) | 2022.08.19 |
pandas 판다스 틀린문제 11 비율에 따른 구간 나누기 qcut, map과 dictionary로 변경값을 새로운 컬럼에 저장 루틴 (0) | 2022.08.19 |
SQL 기초2 기본문법 사용 순서 및 PostgreSQL 리뷰 (0) | 2022.08.19 |