pip install lifelines
import pandas as pd
cancer = pd.read_excel('cancer_survive.xlsx')
cancer.head()
| type | time | delta |
|---|
| 0 | 1 | 1 | 1 |
|---|
| 1 | 1 | 3 | 1 |
|---|
| 2 | 1 | 3 | 1 |
|---|
| 3 | 1 | 4 | 1 |
|---|
| 4 | 1 | 10 | 1 |
|---|
cancer.type.value_counts()
1 52
2 28
Name: type, dtype: int64
import seaborn as sns
sns.stripplot(data=cancer, x='time', hue='delta')
<Axes: xlabel='time'>
카플란 마이어 추정치
from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()
kmf.fit(cancer['time'], cancer['delta'])
<lifelines.KaplanMeierFitter:"KM_estimate", fitted with 80 total observations, 27 right-censored observations>
kmf.survival_function_
kmf.plot_survival_function()
<Axes: xlabel='timeline'>
cancer1 = cancer.query('type == 1')
kmf1 = KaplanMeierFitter()
kmf1.fit(cancer1['time'], event_observed=cancer1['delta'], label='type 1')
<lifelines.KaplanMeierFitter:"type 1", fitted with 52 total observations, 21 right-censored observations>
cancer2 = cancer.query('type == 2')
kmf2 = KaplanMeierFitter()
kmf2.fit(cancer2['time'], event_observed=cancer2['delta'], label='type 2')
<lifelines.KaplanMeierFitter:"type 2", fitted with 28 total observations, 6 right-censored observations>
ax = kmf1.survival_function_.plot()
kmf2.survival_function_.plot(ax=ax)
<Axes: xlabel='timeline'>
생존 함수 차이의 통계적 가설 검정
from lifelines.statistics import logrank_test
res = logrank_test(
cancer1['time'], cancer2['time'],
cancer1['delta'], cancer2['delta'], alpha=.95)
res.print_summary()
| t_0 | -1 |
|---|
| null_distribution | chi squared |
|---|
| degrees_of_freedom | 1 |
|---|
| alpha | 0.95 |
|---|
| test_name | logrank_test |
|---|
| test_statistic | p | -log2(p) |
|---|
| 0 | 2.79 | 0.09 | 3.40 |
|---|
넬슨 알렌 추정치
from lifelines import NelsonAalenFitter
naf1 = NelsonAalenFitter()
naf1.fit(cancer1['time'], event_observed=cancer1['delta'],
label='type 1')
naf1.plot()
<Axes: xlabel='timeline'>