pandas 판다스 기초11 slice, upper, lower, title등 Series의 경우에 작동하는 문법

# 원하는 결과값은 전체 출력이다.
# 전체출력은 원래 그냥 변수만 입력해주면 끝나지만,
# 다중 인덱스를 슬라이스해서 전체출력을 해주는 것이 유의미 할것같아서 시도해보고 싶었다.

# 시도 1
# titanic.loc[(slice(all), slice(all)), :]
# TypeError: '<' not supported between instances of 'builtin_function_or_method' and 'builtin_function_or_method'

# 시도 2
titanic.loc[(slice(None), slice(None)),:]

'''
	survived	age	sibsp	parch	fare	embarked	deck
pclass	sex							
1	female	1	38.0	1	0	71.2833	C	C
    female	1	35.0	1	0	53.1000	S	C
    female	1	58.0	0	0	26.5500	S	C
    female	1	NaN	1	0	146.5208	C	B
    male	0	54.0	0	0	51.8625	S	E
...	...	...	...	...	...	...	...	...
3	male	0	21.0	0	0	8.0500	S	NaN
    male	0	NaN	0	0	7.8958	C	NaN
    male	0	NaN	0	0	8.0500	S	NaN
    male	0	NaN	1	0	15.5000	Q	NaN
    male	0	NaN	2	0	21.6792	C	NaN
50 rows × 7 columns

1

'''

# apply(), map() and applymap()

# 먼저 짚고 넘어갈것이 있다.
# apply는 Series 처럼 1차원에서만 사용이 가능하며
# map은 DataFrame과 같이 다중 차원에서도 사용이 가능하다.

import pandas as pd
sales = pd.read_csv('sales.csv', index_col= 0)
sales

'''
	Mon	Tue	Wed	Thu	Fri
Steven	34	27	15	NaN	33
Mike	45	9	74	87.0	12
Andi	17	33	54	8.0	29
Paul	87	67	27	45.0	7
'''

sales.info()

'''
<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, Steven to Paul
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Mon     4 non-null      int64  
 1   Tue     4 non-null      int64  
 2   Wed     4 non-null      int64  
 3   Thu     3 non-null      float64
 4   Fri     4 non-null      int64  
dtypes: float64(1), int64(4)
memory usage: 192.0+ bytes
'''

# axis = 0일 경우 '열'을 기준 한다.
sales.min(axis = 0)

'''
Mon    17.0
Tue     9.0
Wed    15.0
Thu     8.0
Fri     7.0
dtype: float64
'''

# axis = 1일 경우 '행'을 기준으로 한다.
sales.min(axis = 1)

'''
Steven    15.0
Mike       9.0
Andi       8.0
Paul       7.0
dtype: float64
'''

# 기본 내장 함수로는, 해당 행에 해당되는 사람이 일주일 동안 벌어들인 소득의 최대값과 최소값의 차이를 구하는 것이 없기때문에
# 이를 해결하기 위한 선언 함수를 만들어준다.

def range(series):
    return series.max() - series.min()

sales.apply(range, axis = 0)

'''
Mon    70.0
Tue    58.0
Wed    59.0
Thu    79.0
Fri    26.0
dtype: float64
'''

sales.min(axis = 1)
'''
Steven    15.0
Mike       9.0
Andi       8.0
Paul       7.0
dtype: float64
'''

def range(series):
    return series.max() - series.min()
    
sales.apply(range, axis = 1)

'''
Steven    19.0
Mike      78.0
Andi      46.0
Paul      80.0
dtype: float64
'''

sales.apply(lambda series: series.max() - series.min(), axis = 1)

'''
Steven    19.0
Mike      78.0
Andi      46.0
Paul      80.0
dtype: float64
'''

# 결과적으로 봤을때 선언 함수를 이렇게 써주는 것과 lambda의 구성 원리와 결과는 같다.

# 1 선언 변수 케이스

'''
def range(series):
    return series.max() - series.min()

sales.apply(lambda series: series.max() - series.min(), axis = 1)
'''

# 2 lambda 케이스

sales.apply(lambda series: series.max() - series.min())

'''
한 두번 쓰고 말꺼면 lambda가 더 낮다고 볼 수 있으나,
횟수가 거듭 된 수록 선언변수가 더욱 효율적이라고 할 수 있다.
다만 선언함수는 내 기준에서 좀 더 직관적으로 이해가 가진 않는다.
'''

summer = pd.read_csv('summer.csv')
summer.head()

'''
	Year	City	Sport	Discipline	Athlete	Country	Gender	Event	Medal
0	1896	Athens	Aquatics	Swimming	HAJOS, Alfred	HUN	Men	100M Freestyle	Gold
...	...	...	...	...	...	...	...	...	...
4	1896	Athens	Aquatics	Swimming	CHASAPIS, Spiridon	GRE	Men	100M Freestyle For Sailors	Silver
5 rows × 9 columns
'''

# Athlete 열에 들어있는 각 행의 첫번째 글자 표기
summer.Athlete.apply(lambda x: x[0])

'''
0        H
        ..
31164    L
Name: Athlete, Length: 31165, dtype: object
'''

summer.Athlete.map(lambda x: x[0])

'''
0        H
        ..
31164    L
Name: Athlete, Length: 31165, dtype: object
'''

summer.iloc[:,1:3].applymap(lambda x: x[0])

'''
	City	Sport
0	A	A
...	...	...
31164	L	W
31165 rows × 2 columns
'''

# 판매 이익률을 잡을때 이익률을 40%로 잡았다고 가정하면,
# 판매액에 40%를 곱해주고, 영업관리비용으로 고정적으로 지출되는 5원을 제외하고 생각해준다. 라는 개념이다.
sales.applymap(lambda x: x*0.4-5)

'''
Mon	Tue	Wed	Thu	Fri
Steven	8.6	5.8	1.0	NaN	8.2
...	...	...	...	...	...
Paul	29.8	21.8	5.8	13.0	-2.2
4 rows × 5 columns
'''

sales

'''

Mon	Tue	Wed	Thu	Fri
Steven	34	27	15	NaN	33
...	...	...	...	...	...
Paul	87	67	27	45.0	7
4 rows × 5 columns
'''

# 기본적으로 sales의 모습은 이러하며

sales*0.4-5

'''
	Mon	Tue	Wed	Thu	Fri
Steven	8.6	5.8	1.0	NaN	8.2
...	...	...	...	...	...
Paul	29.8	21.8	5.8	13.0	-2.2
4 rows × 5 columns
'''
# 이것도 위의 lambda를 쓴것과 같은 결과가 나오게 된다.

# Hierarchical Indexing (MultiIndex) Intro
# 계층적 인덱싱(멀티 인덱싱)

import pandas as pd
titanic = pd.read_csv('titanic.csv')
titanic.head()

'''
	survived	pclass	sex	age	sibsp	parch	fare	embarked	deck
0	0	3	male	22.0	1	0	7.2500	S	NaN
1	1	1	female	38.0	1	0	71.2833	C	C
2	1	3	female	26.0	0	0	7.9250	S	NaN
3	1	1	female	35.0	1	0	53.1000	S	C
4	0	3	male	35.0	0	0	8.0500	S	NaN
'''

# 50개의 행에 대해서만 titanic으로 재 변수처리
titanic = titanic.iloc[:50, :]

titanic.set_index('pclass')

'''
	survived	sex	age	sibsp	parch	fare	embarked	deck
pclass								
3	0	male	22.0	1	0	7.2500	S	NaN
1	1	female	38.0	1	0	71.2833	C	C
...	...	...	...	...	...	...	...	...
3	0	male	NaN	2	0	21.6792	C	NaN
3	0	female	18.0	1	0	17.8000	S	NaN
50 rows × 8 columns
'''

# set_index를 다중으로 잡아줄 경우
titanic.set_index(['pclass', 'sex'], inplace=True)

'''
	survived	age	sibsp	parch	fare	embarked	deck
pclass	sex							
3	male	0	22.0	1	0	7.2500	S	NaN
1	female	1	38.0	1	0	71.2833	C	C
...	...	...	...	...	...	...	...	...
3	male	0	NaN	2	0	21.6792	C	NaN
female	0	18.0	1	0	17.8000	S	NaN
50 rows × 7 columns
'''

titanic.sort_index()

'''
	survived	age	sibsp	parch	fare	embarked	deck
pclass	sex							
1	female	1	38.0	1	0	71.2833	C	C
female	1	35.0	1	0	53.1000	S	C
...	...	...	...	...	...	...	...	...
3	male	0	NaN	1	0	15.5000	Q	NaN
male	0	NaN	2	0	21.6792	C	NaN
50 rows × 7 columns
'''

# 이 부분은 아주 흥미롭다.
# 기존에 기준하여 잡은 행이 pclass, sex 이렇게 2개의 컬럼으로 잡았다.
# 이 경우에 ascending 정렬을 둘다 동일하게 먹일경우 기존에 사용하던대로
# titanic.sort_index(ascending=True) or False로 하면 되지만
# 만약 각각 다른 정렬 기준을 적용하고자 한다면, pclass, sex의 순서일 경우
# pclass가 오름차순(True)을 원한다면 True, sex가 내림차순(False)를 원할 경우 이렇게 한다.

titanic.sort_index(ascending=[True, False])

'''
	survived	age	sibsp	parch	fare	embarked	deck
pclass	sex							
1	male	0	54.0	0	0	51.8625	S	E
male	1	28.0	0	0	35.5000	S	A
...	...	...	...	...	...	...	...	...
3	female	1	NaN	0	0	7.7500	Q	NaN
female	0	18.0	1	0	17.8000	S	NaN
50 rows × 7 columns
'''

titanic.sort_index(ascending=[True, True], inplace=True)

# 설정되어있는 컬럼의 기준값을 서로 변경해주는 것이다.
titanic.swaplevel()

'''
    survived	age	sibsp	parch	fare	embarked	deck
sex	pclass							
female	1	1	38.0	1	0	71.2833	C	C
1	1	35.0	1	0	53.1000	S	C
...	...	...	...	...	...	...	...	...
male	3	0	NaN	1	0	15.5000	Q	NaN
3	0	NaN	2	0	21.6792	C	NaN
50 rows × 7 columns
'''

# 만약, swap한대로 저장하고 싶다면
# 새 변수명 = 기존변수.swaplevel()
# 이라고 해주어야한다.
# 이유는 swaplevel의 경우에, inplace = True 기능이 없기 때문이다.

titanic.reset_index()

'''
	pclass	sex	survived	age	sibsp	parch	fare	embarked	deck
0	1	female	1	38.0	1	0	71.2833	C	C
1	1	female	1	35.0	1	0	53.1000	S	C
...	...	...	...	...	...	...	...	...	...
48	3	male	0	NaN	1	0	15.5000	Q	NaN
49	3	male	0	NaN	2	0	21.6792	C	NaN
50 rows × 9 columns
'''

# Hierarchical Indexing (MultiIndex) Part 2

import pandas as pd

titanic = pd.read_csv('titanic.csv')
titanic = titanic.iloc[:50,]
titanic

'''
	survived	pclass	sex	age	sibsp	parch	fare	embarked	deck
0	0	3	male	22.0	1	0	7.2500	S	NaN
1	1	1	female	38.0	1	0	71.2833	C	C
...	...	...	...	...	...	...	...	...	...
48	0	3	male	NaN	2	0	21.6792	C	NaN
49	0	3	female	18.0	1	0	17.8000	S	NaN
50 rows × 9 columns
'''

titanic.loc[1]

'''
survived    1
pclass      1
           ..
embarked    C
deck        C
Name: 1, Length: 9, dtype: object
'''

# 이것 자체로는 변경이 안된다.
# titanic.set_index(['pclass', 'sex']).sort_index(ascending=True)

# 이렇게 해줘야 변경이 된다
titanic = titanic.set_index(['pclass', 'sex']).sort_index(ascending=True)
titanic

'''
	survived	age	sibsp	parch	fare	embarked	deck
pclass	sex							
1	female	1	38.0	1	0	71.2833	C	C
female	1	35.0	1	0	53.1000	S	C
...	...	...	...	...	...	...	...	...
3	male	0	NaN	1	0	15.5000	Q	NaN
male	0	NaN	2	0	21.6792	C	NaN
50 rows × 7 columns
'''

# pclas의 1에 해당하는 값만 선택
titanic.loc[1]

# pclass가 1이나 2에 해당하는 값을 리턴
titanic.loc[[1,2]]

# 1차 인덱스 중에서는 1에 해당되는 값 중에서 2차 인덱스는 female의 값을 가지는 값
titanic.loc[1,'female']

'''
    survived	age	sibsp	parch	fare	embarked	deck
pclass	sex							
1	female	1	38.0	1	0	71.2833	C	C
female	1	35.0	1	0	53.1000	S	C
female	1	58.0	0	0	26.5500	S	C
female	1	NaN	1	0	146.5208	C	B
'''

# 첫번째 인덱스가 1에 해당하고, 두번째 인덱스가 female인것중에서 age만을 출력해주기
titanic.loc[(1,'female'), 'age']

'''
pclass  sex   
1       female    38.0
        female    35.0
        female    58.0
        female     NaN
Name: age, dtype: float64
'''

titanic.loc[(1,'female')]

'''
    survived	age	sibsp	parch	fare	embarked	deck
pclass	sex							
1	female	1	38.0	1	0	71.2833	C	C
    female	1	35.0	1	0	53.1000	S	C
    female	1	58.0	0	0	26.5500	S	C
    female	1	NaN	1	0	146.5208	C	B
'''

# 만약 기준 인덱스값으로 잡아놓은것 중에서 특정값을 선택하고, 그 값에 해당하는 컬럼만 출력을 요구하는데, 2개 이상일경우는 이렇게 한다.
titanic.loc[(1,'male'), ['age', 'fare']]

'''
		age	fare
pclass	sex		
1	male	54.0	51.8625
    male	28.0	35.5000
    male	19.0	263.0000
    male	40.0	27.7208
    male	28.0	82.1708
    male	42.0	52.0000
'''

# 기준인덱스를 설정해주고, 그것에 해당하는 모든 컬럼값을 출력할 경우는

titanic.loc[(2,'female'), :]

'''
        survived	age	sibsp	parch	fare	embarked	deck
pclass	sex							
2	female	1	14.0	1	0	30.0708	C	NaN
    female	1	55.0	0	0	16.0000	S	NaN
    female	0	27.0	1	0	21.0000	S	NaN
    female	1	3.0	1	2	41.5792	C	NaN
'''

# 이부분은 정말 전혀 생각지도 못했던 부분이다.
# 내가 원하는 행동은 첫번째 기준인덱스 전부와 두번째 기준인덱스는 female에 해당하는값의 모든 열을 출력한다.라고 할 때

# 시도1
# titanic.loc[('female'), :]
# KeyError: 'female'

# 시도2
# titanic.loc[(:,'female'),:]
# SyntaxError: invalid syntax

# 시도3
# titanic.loc[(:,'female')]
# SyntaxError: invalid syntax

# 해답코드

# 첫번째 기준 인덱스는 슬라이스를 하지 않기 때문에 모든 값을 가져온다.
# 라는 부분이 이해해야하는 포인트 인것같다.
# 다중 인덱스를 조회할 때의 보편적인 방법이라고 한다.
titanic.loc[(slice(None), slice('female')), :]

'''
    survived	age	sibsp	parch	fare	embarked	deck
pclass	sex							
1	female	1	38.0	1	0	71.2833	C	C
    female	1	35.0	1	0	53.1000	S	C
    female	1	58.0	0	0	26.5500	S	C
    female	1	NaN	1	0	146.5208	C	B
2	female	1	14.0	1	0	30.0708	C	NaN
    ...	...	...	...	...	...	...	...	...
3	female	1	14.0	1	0	11.2417	C	NaN
    female	0	40.0	1	0	9.4750	S	NaN
    female	1	19.0	0	0	7.8792	Q	NaN
    female	1	NaN	0	0	7.7500	Q	NaN
    female	0	18.0	1	0	17.8000	S	NaN
25 rows × 7 columns
'''

# String Operations Intro / Refresher

type('Hello World')
# str

hello = 'Hello world'
hello
# 'Hello world'

# 문자열 소문자 만들기
hello.lower()
# 'hello world'

# 문자열 대문자 만들기
hello.upper()

# 'HELLO WORLD'

# 각 문자열 문장의 첫번째 글씨를 대문자로 전환
hello.title()

# 'Hello World'

# 공백을 기준으로 분리하여 각각의 리스트로 만들어 주는 기능
hello.split(' ')

# ['Hello', 'world']

# 내가 지정한 문자를 다른 문자로 바꿔주는 기능
hello.replace('Hello', 'Hi')
# 'Hi world'

# 그렇지만 완전히 변환되는건 아니고, 표시가 변환처럼 보이는 것이다.
# 심지어 inplace = True 기능도 내장되어있지 않은것으로 보이니
# 진정 변경을 원한다면
# hello = hell.replace('Hello', 'Hi') 로 변경해야 하는 것 같다.
# 여기에 더 나아가서 완전 개별적으로 관리하고 싶으면 메모리주소 변경을 위해서 .copy를 써주는것도 방법일 듯 하다.

import pandas as pd
summer = pd.read_csv('summer.csv')
summer.head()

'''

    Year	City	Sport	Discipline	Athlete	Country	Gender	Event	Medal
0	1896	Athens	Aquatics	Swimming	HAJOS, Alfred	HUN	Men	100M Freestyle	Gold
1	1896	Athens	Aquatics	Swimming	HERSCHMANN, Otto	AUT	Men	100M Freestyle	Silver
2	1896	Athens	Aquatics	Swimming	DRIVAS, Dimitrios	GRE	Men	100M Freestyle For Sailors	Bronze
3	1896	Athens	Aquatics	Swimming	MALOKINIS, Ioannis	GRE	Men	100M Freestyle For Sailors	Gold
4	1896	Athens	Aquatics	Swimming	CHASAPIS, Spiridon	GRE	Men	100M Freestyle For Sailors	Silver
'''

names = summer.loc[:9, 'Athlete'].copy()


'''
0            HAJOS, Alfred
1         HERSCHMANN, Otto
2        DRIVAS, Dimitrios
3       MALOKINIS, Ioannis
4       CHASAPIS, Spiridon
5    CHOROPHAS, Efstathios
6            HAJOS, Alfred
7         ANDREOU, Joannis
8    CHOROPHAS, Efstathios
9            NEUMANN, Paul
Name: Athlete, dtype: object
'''

names.dtypes
# dtype('O')

names[0]
# 'HAJOS, Alfred'

names.lower()
# AttributeError: 'Series' object has no attribute 'lower'
# 즉, names는 현재 Series 상태이므로, lower를 사용하기엔, Series엔 lower가 없다는 뜻이다.

# 따라서 방법을 달리해주어야한다.
names.str.lower()
# 이것을 연산하는 과정이 백터화 과정을 이용해서 진행 한 것이라고 한다.

'''
0            hajos, alfred
1         herschmann, otto
2        drivas, dimitrios
3       malokinis, ioannis
4       chasapis, spiridon
5    chorophas, efstathios
6            hajos, alfred
7         andreou, joannis
8    chorophas, efstathios
9            neumann, paul
Name: Athlete, dtype: object
'''

# String Operations in Pandas

import pandas as pd
summer = pd.read_csv('summer.csv')
summer.head()

'''

    Year	City	Sport	Discipline	Athlete	Country	Gender	Event	Medal
0	1896	Athens	Aquatics	Swimming	HAJOS, Alfred	HUN	Men	100M Freestyle	Gold
1	1896	Athens	Aquatics	Swimming	HERSCHMANN, Otto	AUT	Men	100M Freestyle	Silver
2	1896	Athens	Aquatics	Swimming	DRIVAS, Dimitrios	GRE	Men	100M Freestyle For Sailors	Bronze
3	1896	Athens	Aquatics	Swimming	MALOKINIS, Ioannis	GRE	Men	100M Freestyle For Sailors	Gold
4	1896	Athens	Aquatics	Swimming	CHASAPIS, Spiridon	GRE	Men	100M Freestyle For Sailors	Silver
'''

names = summer.Athlete[:9].copy()

names

'''
0            HAJOS, Alfred
1         HERSCHMANN, Otto
2        DRIVAS, Dimitrios
3       MALOKINIS, Ioannis
4       CHASAPIS, Spiridon
5    CHOROPHAS, Efstathios
6            HAJOS, Alfred
7         ANDREOU, Joannis
8    CHOROPHAS, Efstathios
Name: Athlete, dtype: object
'''

# 전체 다 대문자.
names.str.upper()

'''
0            HAJOS, ALFRED
1         HERSCHMANN, OTTO
2        DRIVAS, DIMITRIOS
3       MALOKINIS, IOANNIS
4       CHASAPIS, SPIRIDON
5    CHOROPHAS, EFSTATHIOS
6            HAJOS, ALFRED
7         ANDREOU, JOANNIS
8    CHOROPHAS, EFSTATHIOS
Name: Athlete, dtype: object
'''

# 전체 다 소문자.
names.str.lower()

'''
0            hajos, alfred
1         herschmann, otto
2        drivas, dimitrios
3       malokinis, ioannis
4       chasapis, spiridon
5    chorophas, efstathios
6            hajos, alfred
7         andreou, joannis
8    chorophas, efstathios
Name: Athlete, dtype: object
'''

# 각 문장의 첫번째 글씨만 대문자.
names.str.title()

'''
0            Hajos, Alfred
1         Herschmann, Otto
2        Drivas, Dimitrios
3       Malokinis, Ioannis
4       Chasapis, Spiridon
5    Chorophas, Efstathios
6            Hajos, Alfred
7         Andreou, Joannis
8    Chorophas, Efstathios
Name: Athlete, dtype: object
'''

summer.Event

'''
0                    100M Freestyle
1                    100M Freestyle
2        100M Freestyle For Sailors
3        100M Freestyle For Sailors
4        100M Freestyle For Sailors
                    ...            
31160                      Wg 84 KG
31161                      Wg 96 KG
31162                      Wg 96 KG
31163                      Wg 96 KG
31164                      Wg 96 KG
Name: Event, Length: 31165, dtype: object
'''

# 각 띄어쓰기마다 분리하여 리스트화 하기
summer.Event.str.split()
# 사실상 split()은 split(' ')과 동일한 기능으로 작동한다.

'''
0                      [100M, Freestyle]
1                      [100M, Freestyle]
2        [100M, Freestyle, For, Sailors]
3        [100M, Freestyle, For, Sailors]
4        [100M, Freestyle, For, Sailors]
                      ...               
31160                       [Wg, 84, KG]
31161                       [Wg, 96, KG]
31162                       [Wg, 96, KG]
31163                       [Wg, 96, KG]
31164                       [Wg, 96, KG]
Name: Event, Length: 31165, dtype: object
'''

# 이것은 생각해보니 in 혹은 not in을 사용하기 전에,
# 내용물을 확인하기 위한 용도로도 쓰일 수 있을것같다.

# summer.Event.str.contains
# <bound method StringMethods.contains of <pandas.core.strings.accessor.StringMethods object at 0x179ebf700>>

# in 과 같은 기능인것같다.
summer.Event.str.contains('100M')

'''
0         True
1         True
2         True
3         True
4         True
         ...  
31160    False
31161    False
31162    False
31163    False
31164    False
Name: Event, Length: 31165, dtype: bool
'''

# ' '는 공백을 기준으로 띄어주라는 소리고
# n은 왼쪽부터 시작하여 ' '처럼 주어진 기준을 n 만큼 분할해준다. 는 의미를 가지고 있다.
# 즉, 띄어쓰기가 3개있든 5개있든 왼쪽 가장 먼저되는것부터 2번해주고 그 다음은 하나의 문장으로써 남아있는다.
summer.Event.str.split(' ', n=1)

'''
0                    [100M, Freestyle]
1                    [100M, Freestyle]
2        [100M, Freestyle For Sailors]
3        [100M, Freestyle For Sailors]
4        [100M, Freestyle For Sailors]
                     ...              
31160                      [Wg, 84 KG]
31161                      [Wg, 96 KG]
31162                      [Wg, 96 KG]
31163                      [Wg, 96 KG]
31164                      [Wg, 96 KG]
Name: Event, Length: 31165, dtype: object
'''

# 당연한거지만, 띄어쓰기가 원래 1개가 있는것은 그냥 1번 분할되고 끝이다.
summer.Event.str.split(' ', n=2)

'''
0                     [100M, Freestyle]
1                     [100M, Freestyle]
2        [100M, Freestyle, For Sailors]
3        [100M, Freestyle, For Sailors]
4        [100M, Freestyle, For Sailors]
                      ...              
31160                      [Wg, 84, KG]
31161                      [Wg, 96, KG]
31162                      [Wg, 96, KG]
31163                      [Wg, 96, KG]
31164                      [Wg, 96, KG]
Name: Event, Length: 31165, dtype: object
'''

# 파라미터를 ' '와 n까지만 써주었을때는 기존의 행을 리스트화 하여서 관리하였다.
# 하지만 expand라는 파라미터를 하나 더 추가해주고 True값으로 주게 되면
# 앞의 파라미터들을 통해서 나누어진 값들이 모두 제각각 DataFrame화 되게 된다.
summer.Event.str.split(' ', n = 2, expand = True)

'''
	0	1	2
0	100M	Freestyle	None
1	100M	Freestyle	None
2	100M	Freestyle	For Sailors
3	100M	Freestyle	For Sailors
4	100M	Freestyle	For Sailors
...	...	...	...
31160	Wg	84	KG
31161	Wg	96	KG
31162	Wg	96	KG
31163	Wg	96	KG
31164	Wg	96	KG
31165 rows × 3 columns
'''

'개발일지 > 임시카테고리' 카테고리의 다른 글

pandas 판다스 기초 12 Matplotlib (0)	2022.07.26
pandas 틀린부분 복기 5 apply, value_counts (0)	2022.07.25
pandas 틀린부분 복기 4 (0)	2022.07.23
pandas 판다스 rank, unique, nunique, count, 평균, 표준편차(mean, std), 상관계수 corr (0)	2022.07.23
Tableau 태블루 맵차트, 지역필터 (0)	2022.07.23

다니엘의 개발 이야기

pandas 판다스 기초11 slice, upper, lower, title등 Series의 경우에 작동하는 문법

'개발일지 > 임시카테고리' 카테고리의 다른 글

티스토리툴바

pandas 판다스 기초11 slice, upper, lower, title등 Series의 경우에 작동하는 문법

'개발일지 > 임시카테고리' 카테고리의 다른 글

관련글

티스토리툴바