%matplotlib inline
#git clone https://github.com/AllonKleinLab/scrublet.git
#cd scrublet
#sudo pip3 install -r requirements.txt
#sudo pip3 install --upgrade .
#sudo pip3 install pySankeyBeta
##remember to do sudo and pip3 otherwise it doesn't work
import scrublet as scr
import scipy.io
import random
import scvelo as scv
import matplotlib.pyplot as plt
from matplotlib import rcParams
import matplotlib
import seaborn as sns
import numpy as np
import sys
from sklearn.manifold import TSNE
from sklearn.preprocessing import scale
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import SpectralClustering
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfTransformer
from pysankey import sankey
sys.path.append('/home/ubuntu/tools/python-genomics')
import Scanpyplus
from importlib import reload
from scanpy.tools._utils import get_init_pos_from_paga as init
import pandas as pd
import scanpy as sc
import anndata
import bbknn
import os
from scipy import sparse
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=80,dpi_save=100,color_map='PuRd')
sc.logging.print_version_and_date()
plt.show()
matplotlib.rcParams.update({'figure.figsize': (8,8)})
import pandasPlus
Running Scanpy 1.5.0, on 2020-06-26 14:21.
rdata=sc.read('../rdata_filter_out_doublet.h5ad')
HumanDict={'0':'Mesenchymal stem cell',
'1':'Muscle stem cell',
'2':'Chondrocyte',
'3':'Mesenchymal cell 1',
'4':'Mesenchymal cell 2',
'5':'Fibroblast',
'6':'Mesenchymal cell 3',
'7':'Tenocyte',
'8':'Muscle precursor cell',
'9':'Neurocyte',
'10':'Endothelial cell',
'11':'Osteoblast',
'12':'Immune cell',
'13':'Smooth muscle cell',
'14':'Epithelial cell 1',
'15':'Erythrocyte',
'16':'Epithelial cell 2',
'17':'Myocyte'
}
rdata.obs['Celltype']=rdata.obs['louvain'].replace(to_replace=HumanDict)
rdata
AnnData object with n_obs × n_vars = 26944 × 33694 obs: 'batch', 'doublet', 'n_counts', 'n_genes', 'percent_mito', 'sample', 'time', 'louvain', 'dpt_pseudotime', 'leiden', 'S_score', 'G2M_score', 'phase', 'Celltype' var: 'n_cells-0', 'n_cells-1', 'n_cells-2', 'n_cells-3', 'n_cells-4', 'n_cells-5', 'n_cells-6' uns: 'leiden', 'leiden_colors', 'louvain', 'louvain_colors', 'neighbors', 'pca', 'sample_colors', 'time_colors' obsm: 'X_pca', 'X_umap', 'X_diffmap' obsp: 'distances', 'connectivities'
del rdata.obsm
del rdata.obsp
del rdata.uns
del rdata.obs['louvain']
rdata=Scanpyplus.Bertie(rdata,batch_key='sample')
Trying to set attribute `.obs` of view, copying.
filtered out 15087 genes that are detected in less than 3 cells normalizing by total count per cell finished (0:00:00): normalized adata.X and added 'n_counts', counts per cell before normalization (adata.obs) extracting highly variable genes finished (0:00:01) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ... as `zero_center=True`, sparse input is densified and may lead to large memory consumption computing PCA on highly variable genes with n_comps=50
/usr/local/lib/python3.6/dist-packages/scanpy/preprocessing/_simple.py:848: UserWarning: Revieved a view of an AnnData. Making a copy. view_to_actual(adata)
finished (0:00:00) computing neighbors using 'X_pca' with n_pcs = 50 finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 14 clusters and added 'louvain', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 18 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 23 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 27 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 31 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 33 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 34 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 38 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 42 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 46 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 52 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 58 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 64 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 68 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 71 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00)
Trying to set attribute `.obs` of view, copying.
filtered out 16271 genes that are detected in less than 3 cells normalizing by total count per cell finished (0:00:00): normalized adata.X and added 'n_counts', counts per cell before normalization (adata.obs) extracting highly variable genes finished (0:00:01) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ... as `zero_center=True`, sparse input is densified and may lead to large memory consumption computing PCA on highly variable genes with n_comps=50
/usr/local/lib/python3.6/dist-packages/scanpy/preprocessing/_simple.py:848: UserWarning: Revieved a view of an AnnData. Making a copy. view_to_actual(adata)
finished (0:00:00) computing neighbors using 'X_pca' with n_pcs = 50 finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 17 clusters and added 'louvain', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 22 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 27 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 30 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 33 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 35 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 37 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 38 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 40 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 41 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 46 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 51 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 55 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 59 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 63 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 65 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 68 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 71 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00)
Trying to set attribute `.obs` of view, copying.
filtered out 15953 genes that are detected in less than 3 cells normalizing by total count per cell finished (0:00:00): normalized adata.X and added 'n_counts', counts per cell before normalization (adata.obs) extracting highly variable genes finished (0:00:01) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ... as `zero_center=True`, sparse input is densified and may lead to large memory consumption computing PCA on highly variable genes with n_comps=50
/usr/local/lib/python3.6/dist-packages/scanpy/preprocessing/_simple.py:848: UserWarning: Revieved a view of an AnnData. Making a copy. view_to_actual(adata)
finished (0:00:00) computing neighbors using 'X_pca' with n_pcs = 50 finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 19 clusters and added 'louvain', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 25 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 29 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 31 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 34 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 39 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 42 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 45 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 47 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 48 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 49 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 50 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 56 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 60 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 65 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 70 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 74 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 77 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 79 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 82 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00)
Trying to set attribute `.obs` of view, copying.
filtered out 15883 genes that are detected in less than 3 cells normalizing by total count per cell finished (0:00:00): normalized adata.X and added 'n_counts', counts per cell before normalization (adata.obs) extracting highly variable genes finished (0:00:01) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ... as `zero_center=True`, sparse input is densified and may lead to large memory consumption computing PCA on highly variable genes with n_comps=50
/usr/local/lib/python3.6/dist-packages/scanpy/preprocessing/_simple.py:848: UserWarning: Revieved a view of an AnnData. Making a copy. view_to_actual(adata)
finished (0:00:00) computing neighbors using 'X_pca' with n_pcs = 50 finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 22 clusters and added 'louvain', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 27 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 31 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 34 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 38 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 41 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 44 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 46 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 48 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 51 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 53 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 55 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 57 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 62 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 63 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 64 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 69 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 74 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 77 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 81 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 83 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 87 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 91 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00)
Trying to set attribute `.obs` of view, copying.
filtered out 13659 genes that are detected in less than 3 cells normalizing by total count per cell finished (0:00:00): normalized adata.X and added 'n_counts', counts per cell before normalization (adata.obs) extracting highly variable genes finished (0:00:01) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ... as `zero_center=True`, sparse input is densified and may lead to large memory consumption computing PCA on highly variable genes with n_comps=50
/usr/local/lib/python3.6/dist-packages/scanpy/preprocessing/_simple.py:848: UserWarning: Revieved a view of an AnnData. Making a copy. view_to_actual(adata)
finished (0:00:00) computing neighbors using 'X_pca' with n_pcs = 50 finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 23 clusters and added 'louvain', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 30 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 36 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 40 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 44 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 49 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 52 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 55 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 58 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 60 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 62 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 65 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 67 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 73 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 75 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 76 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 76 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 81 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 88 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 92 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 96 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 101 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 105 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 109 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00)
Trying to set attribute `.obs` of view, copying.
filtered out 13816 genes that are detected in less than 3 cells normalizing by total count per cell finished (0:00:00): normalized adata.X and added 'n_counts', counts per cell before normalization (adata.obs) extracting highly variable genes finished (0:00:01) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ... as `zero_center=True`, sparse input is densified and may lead to large memory consumption computing PCA on highly variable genes with n_comps=50
/usr/local/lib/python3.6/dist-packages/scanpy/preprocessing/_simple.py:848: UserWarning: Revieved a view of an AnnData. Making a copy. view_to_actual(adata)
finished (0:00:00) computing neighbors using 'X_pca' with n_pcs = 50 finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 17 clusters and added 'louvain', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 23 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 30 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 34 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 38 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 41 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 44 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 47 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 49 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 50 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 56 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 63 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 70 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 75 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 79 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 82 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 85 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 90 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00)
Trying to set attribute `.obs` of view, copying.
filtered out 13551 genes that are detected in less than 3 cells normalizing by total count per cell finished (0:00:00): normalized adata.X and added 'n_counts', counts per cell before normalization (adata.obs) extracting highly variable genes finished (0:00:01) --> added 'highly_variable', boolean vector (adata.var) 'means', float vector (adata.var) 'dispersions', float vector (adata.var) 'dispersions_norm', float vector (adata.var) ... as `zero_center=True`, sparse input is densified and may lead to large memory consumption computing PCA on highly variable genes with n_comps=50
/usr/local/lib/python3.6/dist-packages/scanpy/preprocessing/_simple.py:848: UserWarning: Revieved a view of an AnnData. Making a copy. view_to_actual(adata)
finished (0:00:00) computing neighbors using 'X_pca' with n_pcs = 50 finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 19 clusters and added 'louvain', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 26 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 32 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 37 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 40 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 44 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 49 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 52 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 54 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 56 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 58 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 60 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 67 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 72 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 78 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 85 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 89 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 93 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 97 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 100 clusters and added 'louvain_R', the cluster labels (adata.obs, categorical) (0:00:00)
sc.pp.filter_genes(rdata,min_cells=3)
sc.pp.filter_cells(rdata,min_genes=200)
filtered out 10012 genes that are detected in less than 3 cells
sc.pp.normalize_per_cell(rdata, counts_per_cell_after= 1e4, key_n_counts= 'n_counts')
sc.pp.log1p(rdata)
normalizing by total count per cell finished (0:00:00): normalized adata.X and added 'n_counts', counts per cell before normalization (adata.obs)
rdata.obs['sample'].value_counts()
5386STDY7537944 4613 FCAImmP7536759 4403 FCAImmP7536758 4078 5478STDY7652318 3899 5386STDY7557337 3668 5386STDY7557336 3658 5386STDY7557335 2625 Name: sample, dtype: int64
sc.settings.verbosity=0
for key in rdata.obs['sample'].unique():
rdata_sample = rdata[rdata.obs['sample']==key,:]
print(key)
sc.pp.highly_variable_genes(rdata_sample, min_mean=0.0125, max_mean=3, min_disp=0.5,)
rdata.var['highly_variable'+key]=pd.Series(rdata.var_names,\
index=rdata.var_names).isin(rdata_sample.var_names[rdata_sample.var['highly_variable']])
# pd.DataFrame(rdata.var_names[rdata.var['highly_variable'+key]]).to_csv('200322Batch'+key+'HVG.csv',
# index=False,header=False)
sc.settings.verbosity=3
FCAImmP7536758
Trying to set attribute `.var` of view, copying.
5386STDY7537944
Trying to set attribute `.var` of view, copying.
FCAImmP7536759
Trying to set attribute `.var` of view, copying.
5386STDY7557335
Trying to set attribute `.var` of view, copying.
5386STDY7557336
Trying to set attribute `.var` of view, copying.
5386STDY7557337
Trying to set attribute `.var` of view, copying.
5478STDY7652318
Trying to set attribute `.var` of view, copying.
rdata
AnnData object with n_obs × n_vars = 26944 × 23682 obs: 'batch', 'doublet', 'n_counts', 'n_genes', 'percent_mito', 'sample', 'time', 'dpt_pseudotime', 'leiden', 'S_score', 'G2M_score', 'phase', 'Celltype', 'doublet_scores', 'bh_pval' var: 'n_cells-0', 'n_cells-1', 'n_cells-2', 'n_cells-3', 'n_cells-4', 'n_cells-5', 'n_cells-6', 'n_cells', 'highly_variableFCAImmP7536758', 'highly_variable5386STDY7537944', 'highly_variableFCAImmP7536759', 'highly_variable5386STDY7557335', 'highly_variable5386STDY7557336', 'highly_variable5386STDY7557337', 'highly_variable5478STDY7652318' uns: 'log1p'
temp=rdata.var['highly_variableFCAImmP7536758'].astype('int32')
for key in ['highly_variable5386STDY7537944', 'highly_variableFCAImmP7536759', 'highly_variable5386STDY7557335',
'highly_variable5386STDY7557336', 'highly_variable5386STDY7557337', 'highly_variable5478STDY7652318']:
temp=temp+rdata.var[key].astype('int32')
rdata.var['highly_variable_n']=temp
rdata.var['highly_variable_n'].value_counts()
0 18414 1 2285 2 864 3 523 6 466 7 405 4 382 5 343 Name: highly_variable_n, dtype: int64
bdata=rdata[:,rdata.var['highly_variable_n']>1]
bdata.var['highly_variable']=True
sc.pp.scale(bdata)
sc.tl.pca(bdata, svd_solver='arpack')
sc.pp.neighbors(bdata, n_neighbors=10, n_pcs=50)
sc.tl.umap(bdata)
sc.tl.louvain(bdata)
#sc.tl.tsne(bdata)
sc.tl.leiden(bdata)
Trying to set attribute `.var` of view, copying.
... as `zero_center=True`, sparse input is densified and may lead to large memory consumption computing PCA on highly variable genes with n_comps=50 finished (0:00:04) computing neighbors using 'X_pca' with n_pcs = 50 finished: added to `.uns['neighbors']` `.obsp['distances']`, distances for each pair of neighbors `.obsp['connectivities']`, weighted adjacency matrix (0:00:04) computing UMAP finished: added 'X_umap', UMAP coordinates (adata.obsm) (0:00:22) running Louvain clustering using the "louvain" package of Traag (2017) finished: found 29 clusters and added 'louvain', the cluster labels (adata.obs, categorical) (0:00:02) running Leiden clustering finished: found 36 clusters and added 'leiden', the cluster labels (adata.obs, categorical) (0:00:04)
rdata.obs['louvain']=bdata.obs['louvain']
rdata.obs['leiden']=bdata.obs['leiden']
rdata.obsm['X_umap']=bdata.obsm['X_umap']
sc.pl.umap(rdata,color=['leiden','sample','n_counts','time','bh_pval',
'PAX3','PAX7','MYOG','MYBPH','SOX9','COL2A1','PECAM1','GJB6',
'KRT19','HBA2','HBZ','TWIST2','FCER1G','EDNRB','COL8A1','ACTA2'],color_map='jet')