from sklearn.decomposition import NMF
nmf = NMF(n_components=15, random_state=1234, max_iter=500)
doc_emb = nmf.fit_transform(dtm)
doc_emb[0]
array([0.04495691, 0.00993037, 0.00414172, 0. , 0.08992402,
0.01568343, 0. , 0.20330137, 0.00417763, 0. ,
0. , 0. , 0.10685757, 0. , 0.18545175])
(doc_emb[0] ** 2).sum()
0.09762898174810467
from sklearn.preprocessing import normalize
norm_doc_emb = normalize(doc_emb)
norm_doc_emb[0]
array([0.1438822 , 0.03178161, 0.01325536, 0. , 0.28779706,
0.05019399, 0. , 0.65065523, 0.0133703 , 0. ,
0. , 0. , 0.34199196, 0. , 0.59352848])
(norm_doc_emb[0] ** 2).sum()
0.9999999999999999
from sklearn.cluster import KMeans
kms = {}
inertia = []
ks = list(range(2, 30))
for k in ks:
km = KMeans(n_clusters=k, n_init='auto', random_state=1234)
km.fit(norm_doc_emb)
inertia.append(km.inertia_)
kms[k] = km
import matplotlib.pyplot as plt
plt.plot(ks, inertia)
[<matplotlib.lines.Line2D at 0x246c68391e0>]
n_cluster = 7
km = kms[n_cluster]
cluster = km.fit_predict(norm_doc_emb)
cluster
array([2, 5, 1, 2, 2, 2, 3, 2, 5, 1, 0, 3, 5, 4, 0, 0, 1, 2, 1, 2, 2, 0,
0, 3, 0, 5, 3, 6, 5, 6, 3, 0, 2, 1, 5, 2, 3, 0, 2, 0, 0, 4, 2, 2,
0, 2, 6, 2, 5, 0, 1, 3, 4, 0, 5, 0, 3, 2, 1, 1, 2, 2, 1, 1, 1, 2,
2, 3, 5, 2, 6, 6, 2, 2, 4, 3, 2, 2, 2, 5, 2, 5, 2, 1, 2, 6, 3, 3,
2, 4, 5, 2, 4, 3, 2, 2, 1, 6, 1, 2, 1, 2, 3, 2, 5, 5, 4, 3, 3, 0,
5, 3, 2, 5, 5, 2, 1, 0, 3, 2, 2, 3, 1, 0, 3, 3, 1, 2, 0, 5, 5, 2,
1, 1, 0, 5, 4, 4, 2, 2, 2, 2, 2, 2, 0, 3, 1, 2, 3, 1, 5, 2, 3, 6,
5, 4, 6, 2, 5, 2, 5, 2, 0, 6, 0, 3, 1, 0, 0, 3, 5, 0, 2, 3, 0, 3,
2, 0, 0, 3, 0, 0, 4, 3, 3, 6, 3, 1, 2, 3, 2, 1, 5, 3, 6, 6, 5, 5,
2, 5, 3, 0, 0, 2, 6, 2, 3, 3, 0, 3, 2, 2, 1, 0, 5, 1, 2, 1, 5, 1,
2, 0, 5, 5, 2, 2, 5, 2, 4, 3, 5, 2, 3, 4, 4, 2, 5, 6, 0, 4, 2, 4,
4, 2, 0, 0, 2, 4, 0, 0, 2, 2, 2, 2, 6, 2, 2, 6, 5, 2, 2, 6, 4, 2,
3, 2, 5, 1, 4, 3, 0, 6, 3, 3, 2, 0, 0, 2, 4, 3, 5, 1, 3, 3, 6, 3,
4, 6, 2, 0, 3, 2, 4, 0, 6, 4, 0, 2, 6, 4, 4, 6, 0, 0, 0, 0, 4, 0,
0, 3, 0, 4, 0, 2, 4, 4, 2, 6, 5, 2, 2, 6, 4, 3, 2, 2, 0, 3, 4, 0,
6, 6, 2, 2, 0, 2, 6, 6, 4, 1, 1, 2, 4, 6, 4, 2, 2, 4, 4, 3, 6, 5,
4, 3, 2, 6, 3, 0, 6, 0, 2, 2, 2, 2, 2, 3, 0, 4, 3, 2, 5, 0, 3, 2,
2, 5, 4, 5, 6, 2, 1, 2, 2, 2, 5, 1, 6, 5, 6, 5, 5, 4, 3, 2, 6, 6,
0, 6, 6, 6, 2, 2, 3, 0, 6, 6, 6, 6, 6, 2, 6, 6, 2, 0, 5, 2, 3, 0,
6, 0, 6, 1, 6, 6, 2, 4, 5, 1, 2, 3, 5, 2, 1, 2, 2, 1, 2, 0, 5])
df[cluster == 0].head()
| status | ko_title | en_title | abstract |
---|
10 | 등록 | 탈모방지 및 발모촉진용 헤어샴푸 조성물과 이의 제조방법 | hair shampoo composition for preventing hair l... | 본 발명은 샴푸 전동 의자에 관한 것으로, 회전암대 회동수단; 일측이 회전암대 회동... |
---|
14 | 등록 | 영유아 샴푸 보조 기구 | SHAMPOO ASSIST DEVICE FOR INFANT AND TODDLER | 본 발명은 직립형 샴푸 캡에 관한 것으로, 사용자의 머리에 착용 가능하도록 형성되고... |
---|
15 | 등록 | 컨디셔닝 샴푸 조성물 | Conditioning shampoo composition | 본 발명의 실시예에 따른 영유아 샴푸 보조 기구는 아기의 머리를 지지하는 상단 돌출... |
---|
21 | 등록 | 샴푸 용기용 펌프 디스펜서 | PUMP DISPENSER FOR SHAMPOO VESSEL | 본 발명은 샴푸 용기용 펌프 디스펜서에 관한 것으로서, 승강수단은 상기 피스톤(23... |
---|
22 | 등록 | 석창포 추출물을 함유하는 발모촉진과 탈모방지 및 비듬방지를 위한 샴푸 조성물과 조성... | Acoris gramineus Sol shampoo composition and m... | 본 발명은 샴푸 용기용 펌프 디스펜서에 관한 것으로서, 승강수단은 상기 피스톤(23... |
---|
from sklearn.metrics import silhouette_samples, silhouette_score
slh_avg = silhouette_score(norm_doc_emb, cluster)
slh_avg
0.2648528175529998
slh_values = silhouette_samples(norm_doc_emb, cluster)
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
y_lower = 10
for i in range(n_cluster):
cluster_slh = slh_values[cluster == i]
cluster_slh.sort()
size = cluster_slh.shape[0]
y_upper = y_lower + size
color = cm.nipy_spectral(float(i) / n_cluster)
plt.fill_betweenx(
np.arange(y_lower, y_upper),
0,
cluster_slh,
facecolor=color,
edgecolor=color,
alpha=0.7,
)
plt.text(-0.05, y_lower + 0.5 * size, str(i))
y_lower = y_upper + 10
plt.axvline(slh_avg, linestyle='--', color='gray')
plt.yticks([])
([], [])