{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Alteration of intercellular signaling in ulcerative colitis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Analysising single cell RNA-seq data for inter and intracellular interactions. \n", "The first step is to import the singel cell RNA-seq data file. The import file is the cell cluster averaged gene expression files. OmniPath is accesed by the python client." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import igraph as ig\n", "import os\n", "from itertools import permutations\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\modos\\.conda\\envs\\Single_cell_RNA_seq\\lib\\site-packages\\requests\\__init__.py:91: RequestsDependencyWarning: urllib3 (1.26.2) or chardet (3.0.4) doesn't match a supported version!\n", " RequestsDependencyWarning)\n" ] } ], "source": [ "import omnipath as op" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First lets check the OmniPAth version and data." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.11.34\n", "Options(url='https://omnipathdb.org', license=, cache=, autoload=True, convert_dtypes=True, num_retries=3, timeout=5.0, chunk_size=8196)\n" ] } ], "source": [ "print(op.__server_version__)\n", "print(op.options)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can donwload OmniPath intercellular network and see what kind of columns are in the intercellular dataframe. " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "intercell_network = op.interactions.import_intercell_network()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexsourcetargetis_stimulationis_inhibitionconsensus_directionconsensus_stimulationconsensus_inhibitiondip_urlcuration_effort...category_source_intercell_targetuniprot_intercell_targetgenesymbol_intercell_targetentity_type_intercell_targetconsensus_score_intercell_targettransmitter_intercell_targetreceiver_intercell_targetsecreted_intercell_targetplasma_membrane_transmembrane_intercell_targetplasma_membrane_peripheral_intercell_target
00P14416P48995TrueFalseTrueTrueFalsenan1...resource_specificP48995TRPC1protein1FalseTrueFalseFalseFalse
12P14416P48995TrueFalseTrueTrueFalsenan1...resource_specificP48995TRPC1protein3FalseTrueFalseFalseFalse
25P14416P48995TrueFalseTrueTrueFalsenan1...compositeP48995TRPC1protein3FalseTrueFalseFalseFalse
37Q13255P48995TrueFalseTrueTrueFalsenan1...resource_specificP48995TRPC1protein1FalseTrueFalseFalseFalse
49Q13255P48995TrueFalseTrueTrueFalsenan1...resource_specificP48995TRPC1protein3FalseTrueFalseFalseFalse
\n", "

5 rows × 46 columns

\n", "
" ], "text/plain": [ " index source target is_stimulation is_inhibition consensus_direction \\\n", "0 0 P14416 P48995 True False True \n", "1 2 P14416 P48995 True False True \n", "2 5 P14416 P48995 True False True \n", "3 7 Q13255 P48995 True False True \n", "4 9 Q13255 P48995 True False True \n", "\n", " consensus_stimulation consensus_inhibition dip_url curation_effort ... \\\n", "0 True False nan 1 ... \n", "1 True False nan 1 ... \n", "2 True False nan 1 ... \n", "3 True False nan 1 ... \n", "4 True False nan 1 ... \n", "\n", " category_source_intercell_target uniprot_intercell_target \\\n", "0 resource_specific P48995 \n", "1 resource_specific P48995 \n", "2 composite P48995 \n", "3 resource_specific P48995 \n", "4 resource_specific P48995 \n", "\n", " genesymbol_intercell_target entity_type_intercell_target \\\n", "0 TRPC1 protein \n", "1 TRPC1 protein \n", "2 TRPC1 protein \n", "3 TRPC1 protein \n", "4 TRPC1 protein \n", "\n", " consensus_score_intercell_target transmitter_intercell_target \\\n", "0 1 False \n", "1 3 False \n", "2 3 False \n", "3 1 False \n", "4 3 False \n", "\n", " receiver_intercell_target secreted_intercell_target \\\n", "0 True False \n", "1 True False \n", "2 True False \n", "3 True False \n", "4 True False \n", "\n", " plasma_membrane_transmembrane_intercell_target \\\n", "0 False \n", "1 False \n", "2 False \n", "3 False \n", "4 False \n", "\n", " plasma_membrane_peripheral_intercell_target \n", "0 False \n", "1 False \n", "2 False \n", "3 False \n", "4 False \n", "\n", "[5 rows x 46 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "intercell_network.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['index', 'source', 'target', 'is_stimulation', 'is_inhibition',\n", " 'consensus_direction', 'consensus_stimulation', 'consensus_inhibition',\n", " 'dip_url', 'curation_effort', 'references', 'sources',\n", " 'references_stripped', 'n_references', 'n_sources', 'n_primary_sources',\n", " 'category_intercell_source', 'parent_intercell_source',\n", " 'database_intercell_source', 'scope_intercell_source',\n", " 'aspect_intercell_source', 'category_source_intercell_source',\n", " 'uniprot_intercell_source', 'genesymbol_intercell_source',\n", " 'entity_type_intercell_source', 'consensus_score_intercell_source',\n", " 'transmitter_intercell_source', 'receiver_intercell_source',\n", " 'secreted_intercell_source',\n", " 'plasma_membrane_transmembrane_intercell_source',\n", " 'plasma_membrane_peripheral_intercell_source',\n", " 'category_intercell_target', 'parent_intercell_target',\n", " 'database_intercell_target', 'scope_intercell_target',\n", " 'aspect_intercell_target', 'category_source_intercell_target',\n", " 'uniprot_intercell_target', 'genesymbol_intercell_target',\n", " 'entity_type_intercell_target', 'consensus_score_intercell_target',\n", " 'transmitter_intercell_target', 'receiver_intercell_target',\n", " 'secreted_intercell_target',\n", " 'plasma_membrane_transmembrane_intercell_target',\n", " 'plasma_membrane_peripheral_intercell_target'],\n", " dtype='object')" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "intercell_network.columns" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(30265, 46)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "intercell_network.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here we can check what are the categories what we can use for intercellular interactions. We can use the intercellular interctions directly, hovewer it contain porteins which are invovled in the adhesion process and binds to the intracelluar domains of receptor proteins. That is why we suggest the fitlering as in the paper. " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'desmosome', 'cell_surface_ligand', 'cell_surface_enzyme', 'secreted_receptor', 'ecm', 'receptor_regulator', 'adhesion', 'tight_junction', 'cell_adhesion', 'matrix_adhesion_regulator', 'cell_surface_peptidase', 'ecm_regulator', 'secreted_enzyme', 'ligand_regulator', 'gap_junction', 'ligand'}\n" ] } ], "source": [ "print (set(intercell_network[\"category_intercell_source\"]))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'adherens_junction', 'receptor', 'ion_channel_regulator', 'desmosome', 'adhesion', 'tight_junction', 'cell_adhesion', 'matrix_adhesion', 'transporter', 'ion_channel', 'gap_junction'}\n" ] } ], "source": [ "print (set(intercell_network[\"category_intercell_target\"]))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "filtered_source_types = [\"cell_surface_enzyme\",\"cell_surface_ligand\",\"ligand\",\"secreted_enzyme\",\"secreted_receptor\",\n", " \"adhesion\",\"cell_adhesion\",\"tight_junction\",\"cell_surface_peptidase\",\"gap_junction\",\"desmosome\"]\n", "filtered_target_types = [\"adhesion\",\"receptor\",\"cell_adhesion\",\"tight_junction\",\"gap_junction\",\"ion_channel\",\"transporter\",\n", " \"adherens_junction\"]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(20784, 46)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_intercell_network = intercell_network[intercell_network.category_intercell_source.isin(filtered_source_types)] \n", "filtered_intercell_network = filtered_intercell_network[filtered_intercell_network.\n", " category_intercell_target.isin(filtered_target_types)]\n", "filtered_intercell_network.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The next step is to preprocess the data files. The imput file is the cell type specfic expression. The data are from Smillie et al 2019 (https://pubmed.ncbi.nlm.nih.gov/31348891/). Each column is the average per cell type in transcript per million. From that we will build a simple cell-cell interaction count. It is based on expression tresholding. Our assumptation was that the interactions are important if they are exisit in any way, so we have chosen a realtievely low expression treshold. We invite the user for testing various tresholds in their own work. \n", "You will need for this the expression data which are in this example's folder." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "df_cell_imm = pd.read_csv(\"imm_all_expression_condition.tsv\", sep=\"\\t\", index_col=0, header=0)\n", "df_cell_fib = pd.read_csv(\"fib_all_expression_condition.tsv\",sep=\"\\t\", index_col=0, header=0)\n", "df_cell_epi = pd.read_csv(\"epi_all_expression_condition.tsv\",sep=\"\\t\", index_col=0, header=0)\n", "df_cell_exp = df_cell_imm.join(df_cell_fib, how=\"outer\") #Keeping all genes\n", "df_cell_exp = df_cell_exp.join(df_cell_epi, how=\"outer\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RNA.CD8..IELs_HealthyRNA.CD8..LP_HealthyRNA.CD4..Memory_HealthyRNA.MT.hi_HealthyRNA.Cycling.T_HealthyRNA.NKs_HealthyRNA.CD4..Activated.Fos.lo_HealthyRNA.CD4..Activated.Fos.hi_HealthyRNA.CD8..IELs_UninflamedRNA.MT.hi_Uninflamed...RNA.Immature.Goblet_InflamedRNA.Stem_UninflamedRNA.Immature.Enterocytes.2_InflamedRNA.Goblet_InflamedRNA.Tuft_InflamedRNA.Enterocytes_InflamedRNA.Best4..Enterocytes_InflamedRNA.Enteroendocrine_InflamedRNA.M.cells_InflamedRNA.M.cells_Uninflamed
7SK0.0301820.0421580.0167920.0000000.0000000.2289910.0000000.0150920.0305730.000000...0.000000.0016790.0000000.0000007.0051920.0024650.0079130.0000000.0000000.000000
A1BG0.0624240.0258480.0494980.2084310.3219390.0000000.0451580.0185310.0530370.000000...0.000000.0132240.0024730.0892650.0000000.0000000.0000000.0000000.0000000.000000
A1BG-AS10.2769950.1985920.1384060.2950420.0000000.4987470.1301860.2561560.3054820.000000...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
A1CF0.0029780.0056690.0000000.0000000.0258400.0000000.0000000.0000000.0791500.000000...0.661230.2111920.8485890.1874660.0000000.4862240.7785872.6536540.4572930.737544
A2M0.0235450.2144420.2026090.1679210.0977670.5456770.1448940.1669560.0103040.243075...0.000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
\n", "

5 rows × 153 columns

\n", "
" ], "text/plain": [ " RNA.CD8..IELs_Healthy RNA.CD8..LP_Healthy RNA.CD4..Memory_Healthy \\\n", "7SK 0.030182 0.042158 0.016792 \n", "A1BG 0.062424 0.025848 0.049498 \n", "A1BG-AS1 0.276995 0.198592 0.138406 \n", "A1CF 0.002978 0.005669 0.000000 \n", "A2M 0.023545 0.214442 0.202609 \n", "\n", " RNA.MT.hi_Healthy RNA.Cycling.T_Healthy RNA.NKs_Healthy \\\n", "7SK 0.000000 0.000000 0.228991 \n", "A1BG 0.208431 0.321939 0.000000 \n", "A1BG-AS1 0.295042 0.000000 0.498747 \n", "A1CF 0.000000 0.025840 0.000000 \n", "A2M 0.167921 0.097767 0.545677 \n", "\n", " RNA.CD4..Activated.Fos.lo_Healthy \\\n", "7SK 0.000000 \n", "A1BG 0.045158 \n", "A1BG-AS1 0.130186 \n", "A1CF 0.000000 \n", "A2M 0.144894 \n", "\n", " RNA.CD4..Activated.Fos.hi_Healthy RNA.CD8..IELs_Uninflamed \\\n", "7SK 0.015092 0.030573 \n", "A1BG 0.018531 0.053037 \n", "A1BG-AS1 0.256156 0.305482 \n", "A1CF 0.000000 0.079150 \n", "A2M 0.166956 0.010304 \n", "\n", " RNA.MT.hi_Uninflamed ... RNA.Immature.Goblet_Inflamed \\\n", "7SK 0.000000 ... 0.00000 \n", "A1BG 0.000000 ... 0.00000 \n", "A1BG-AS1 0.000000 ... NaN \n", "A1CF 0.000000 ... 0.66123 \n", "A2M 0.243075 ... 0.00000 \n", "\n", " RNA.Stem_Uninflamed RNA.Immature.Enterocytes.2_Inflamed \\\n", "7SK 0.001679 0.000000 \n", "A1BG 0.013224 0.002473 \n", "A1BG-AS1 NaN NaN \n", "A1CF 0.211192 0.848589 \n", "A2M 0.000000 0.000000 \n", "\n", " RNA.Goblet_Inflamed RNA.Tuft_Inflamed RNA.Enterocytes_Inflamed \\\n", "7SK 0.000000 7.005192 0.002465 \n", "A1BG 0.089265 0.000000 0.000000 \n", "A1BG-AS1 NaN NaN NaN \n", "A1CF 0.187466 0.000000 0.486224 \n", "A2M 0.000000 0.000000 0.000000 \n", "\n", " RNA.Best4..Enterocytes_Inflamed RNA.Enteroendocrine_Inflamed \\\n", "7SK 0.007913 0.000000 \n", "A1BG 0.000000 0.000000 \n", "A1BG-AS1 NaN NaN \n", "A1CF 0.778587 2.653654 \n", "A2M 0.000000 0.000000 \n", "\n", " RNA.M.cells_Inflamed RNA.M.cells_Uninflamed \n", "7SK 0.000000 0.000000 \n", "A1BG 0.000000 0.000000 \n", "A1BG-AS1 NaN NaN \n", "A1CF 0.457293 0.737544 \n", "A2M 0.000000 0.000000 \n", "\n", "[5 rows x 153 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_cell_exp.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will choose the mean -2SD of the whole data set. First we do a log2 based transformation." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "df_cell_exp[df_cell_exp.values == 0] = \"NaN\"" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RNA.CD8..IELs_HealthyRNA.CD8..LP_HealthyRNA.CD4..Memory_HealthyRNA.MT.hi_HealthyRNA.Cycling.T_HealthyRNA.NKs_HealthyRNA.CD4..Activated.Fos.lo_HealthyRNA.CD4..Activated.Fos.hi_HealthyRNA.CD8..IELs_UninflamedRNA.MT.hi_Uninflamed...RNA.Immature.Goblet_InflamedRNA.Stem_UninflamedRNA.Immature.Enterocytes.2_InflamedRNA.Goblet_InflamedRNA.Tuft_InflamedRNA.Enterocytes_InflamedRNA.Best4..Enterocytes_InflamedRNA.Enteroendocrine_InflamedRNA.M.cells_InflamedRNA.M.cells_Uninflamed
7SK0.03018190.04215780.0167924NaNNaN0.228991NaN0.01509180.0305725NaN...NaN0.00167886NaNNaN7.005190.002464880.00791332NaNNaNNaN
A1BG0.0624240.02584810.04949750.2084310.321939NaN0.04515780.01853140.0530365NaN...NaN0.01322390.00247320.089265NaNNaNNaNNaNNaNNaN
A1BG-AS10.2769950.1985920.1384060.295042NaN0.4987470.1301860.2561560.305482NaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
A1CF0.002977620.00566872NaNNaN0.0258403NaNNaNNaN0.0791505NaN...0.661230.2111920.8485890.187466NaN0.4862240.7785872.653650.4572930.737544
A2M0.02354520.2144420.2026090.1679210.09776670.5456770.1448940.1669560.01030370.243075...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

5 rows × 153 columns

\n", "
" ], "text/plain": [ " RNA.CD8..IELs_Healthy RNA.CD8..LP_Healthy RNA.CD4..Memory_Healthy \\\n", "7SK 0.0301819 0.0421578 0.0167924 \n", "A1BG 0.062424 0.0258481 0.0494975 \n", "A1BG-AS1 0.276995 0.198592 0.138406 \n", "A1CF 0.00297762 0.00566872 NaN \n", "A2M 0.0235452 0.214442 0.202609 \n", "\n", " RNA.MT.hi_Healthy RNA.Cycling.T_Healthy RNA.NKs_Healthy \\\n", "7SK NaN NaN 0.228991 \n", "A1BG 0.208431 0.321939 NaN \n", "A1BG-AS1 0.295042 NaN 0.498747 \n", "A1CF NaN 0.0258403 NaN \n", "A2M 0.167921 0.0977667 0.545677 \n", "\n", " RNA.CD4..Activated.Fos.lo_Healthy RNA.CD4..Activated.Fos.hi_Healthy \\\n", "7SK NaN 0.0150918 \n", "A1BG 0.0451578 0.0185314 \n", "A1BG-AS1 0.130186 0.256156 \n", "A1CF NaN NaN \n", "A2M 0.144894 0.166956 \n", "\n", " RNA.CD8..IELs_Uninflamed RNA.MT.hi_Uninflamed ... \\\n", "7SK 0.0305725 NaN ... \n", "A1BG 0.0530365 NaN ... \n", "A1BG-AS1 0.305482 NaN ... \n", "A1CF 0.0791505 NaN ... \n", "A2M 0.0103037 0.243075 ... \n", "\n", " RNA.Immature.Goblet_Inflamed RNA.Stem_Uninflamed \\\n", "7SK NaN 0.00167886 \n", "A1BG NaN 0.0132239 \n", "A1BG-AS1 NaN NaN \n", "A1CF 0.66123 0.211192 \n", "A2M NaN NaN \n", "\n", " RNA.Immature.Enterocytes.2_Inflamed RNA.Goblet_Inflamed \\\n", "7SK NaN NaN \n", "A1BG 0.0024732 0.089265 \n", "A1BG-AS1 NaN NaN \n", "A1CF 0.848589 0.187466 \n", "A2M NaN NaN \n", "\n", " RNA.Tuft_Inflamed RNA.Enterocytes_Inflamed \\\n", "7SK 7.00519 0.00246488 \n", "A1BG NaN NaN \n", "A1BG-AS1 NaN NaN \n", "A1CF NaN 0.486224 \n", "A2M NaN NaN \n", "\n", " RNA.Best4..Enterocytes_Inflamed RNA.Enteroendocrine_Inflamed \\\n", "7SK 0.00791332 NaN \n", "A1BG NaN NaN \n", "A1BG-AS1 NaN NaN \n", "A1CF 0.778587 2.65365 \n", "A2M NaN NaN \n", "\n", " RNA.M.cells_Inflamed RNA.M.cells_Uninflamed \n", "7SK NaN NaN \n", "A1BG NaN NaN \n", "A1BG-AS1 NaN NaN \n", "A1CF 0.457293 0.737544 \n", "A2M NaN NaN \n", "\n", "[5 rows x 153 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_cell_exp.head()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "data_matrix = df_cell_exp.to_numpy()\n", "data_matrix_log2 = np.log2(data_matrix.astype(float)) #We need to change the data file to floats\n", "mean_cell = np.nanmean(data_matrix_log2)\n", "std = np.nanstd(data_matrix_log2)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(-2.160708171397226, 3.00937762482671)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_cell, std" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can check the distribuiton of the cell types. Here only for one." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[-5.0501723 , -4.56805614, -5.89604351, ..., nan,\n", " nan, nan],\n", " [-4.00175632, -5.27379813, -4.33649916, ..., nan,\n", " nan, nan],\n", " [-1.85206773, -2.33211901, -2.8530207 , ..., nan,\n", " nan, nan],\n", " ...,\n", " [-3.47290816, -4.19215007, -4.40580567, ..., -2.06536459,\n", " 0.35303725, 0.44592408],\n", " [-4.38586399, -2.27399124, -3.85313444, ..., -4.58601467,\n", " -8.31527547, nan],\n", " [-2.5735855 , -1.76640068, -1.1066736 , ..., -4.14328163,\n", " -7.335609 , nan]])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_matrix_log2" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXKklEQVR4nO3df6zdd33f8ee7TgyMGjsxnu1eB9kGq11gIk5cHAqq2mRUTrbO6dSmoIpYyJ2nzVh0TO3S7Q9WqZNotS2BKAryCMOZKDTLiOyiKDQ1PyeNDKfHCgfSKreWk/jiX4HgpmTZxfF7f9zPvT45vrGP7fs9n/Pj+ZCu7vf7Od9779vX4cXX7/P5fj6RmUiS+u+nahcgSePKAJakSgxgSarEAJakSgxgSarkitoFXI4tW7bko48+WrsMSbqQmG9wqO+An3/++dolSNIlG+oAlqRhZgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRVMtTLUWqwTE9P02q1XjW2ceNGFi9eXKkiabAZwFowrVaLXffuZenEegBOTR3inp2wefPmypVJg8kA1oJaOrGe5eveDsCZV07TbrfPuca7YmmGAazGvHj8We4+/DKrJnNuzLti6SwDWJesu+fbbrfJM/mqa5asWjt3Ryzp1QxgXbLunu/UwW+ybMMNlauShocBrMvS2fM99f1DlauRhovzgCWpEgNYkiqxBaGe9fKmm6TeGcDqmW+6SQvLANZF8U03aeHYA5akSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakShoN4IhYFhEPRcRfRcRTEfHuiLg6Ih6LiKfL56vKtRERn4yIyYh4MiKub7I2Saqt6TvgTwCPZubPAe8EngLuBPZn5gZgfzkHuAXYUD52APc1XJskVdVYAEfEUuAXgfsBMnM6M38EbAX2lMv2ALeV463AAznjW8CyiFjdVH2SVFuTjyKvA04C/y0i3gk8AXwEWJmZR8s1x4CV5XgCeK7j64+UsaNoZMy3T5x7xGlcNRnAVwDXA7sy8/GI+ARn2w0AZGZGxEUtpxURO5hpUfCWt7xloWpVn3TvE+cecRpnTfaAjwBHMvPxcv4QM4F8fLa1UD6fKK9PAdd0fP2aMvYqmbk7Mzdl5qYVK1Y0VryaM7tP3PJ1b59bWU0aR40FcGYeA56LiJ8tQzcD3wP2AdvK2DZgbzneB9xRZkPcCJzqaFVI0shpejnKXcDnImIxcAj4EDOh/2BEbAeeAW4v1z4C3ApMAi+VayVpZDUawJl5ENg0z0s3z3NtAjubrEeSBolPwklSJQawJFViAEtSJQawJFViAEtSJQawJFViAEtSJQawJFViAEtSJQawJFXS9FoQ0nm5PrDGmQGsqlwfWOPMANZrmp6eptVqzZ23223yzEWtn9+T2fWBpXFjAOs1tVotdt27d27R9KmD32TZhhsqVyWNDgNY57V0Yv3c3emp7x+qXI00WpwFIUmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVInT0MZE90MVMJiP/PpossaJATwmuh+qGNRHfn00WePEAB4jnQ9VDDIfTda4sAcsSZUYwJJUiQEsSZXYAx5T8802AGccSP1kAI+p7tkG4IwDqd8M4DHWPdug+664qQXYJc0wgDWn+67YBdilZhnAepXOu2IXYJea5SwISarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSaqk0QCOiMMR8Z2IOBgRB8rY1RHxWEQ8XT5fVcYjIj4ZEZMR8WREXN9kbZJUWz/ugH85M6/LzE3l/E5gf2ZuAPaXc4BbgA3lYwdwXx9qk6RqarQgtgJ7yvEe4LaO8QdyxreAZRGxukJ9ktQXTQdwAn8eEU9ExI4ytjIzj5bjY8DKcjwBPNfxtUfK2KtExI6IOBARB06ePNlU3ZLUuKbXgnhvZk5FxN8HHouIv+p8MTMzIi5qua3M3A3sBti0aZNLdUkaWo3eAWfmVPl8AngYeBdwfLa1UD6fKJdPAdd0fPmaMiZJI6mxAI6IN0bEktlj4FeANrAP2FYu2wbsLcf7gDvKbIgbgVMdrQpJGjlNtiBWAg9HxOzP+ZPMfDQivg08GBHbgWeA28v1jwC3ApPAS8CHGqxNkqprLIAz8xDwznnGfwDcPM94AjubqkeSBo1PwklSJe6IMaKmp6dptVpz5+7vJg0eA3hEtVotdt27l6UT6wH3d5MGkQE8wpZOrHd/N2mA2QOWpEoMYEmqxACWpEoMYEmqxACWpEoMYEmqxACWpEoMYEmqxACWpEoMYEmqxACWpEoMYEmqxMV4hlT3cpMAGzduZPHixZUqknSxDOAh1b3c5KmpQ9yzEzZv3ly5Mkm9MoCHWOdyk5KGjz1gSarEAJakSmxBjIgzr5ym3W7PnbsHnDT4DOAR8eLxZ7n78MusmpwJXfeAkwafATxClqxa6x5w0hAxgDXQulsr4HxnjQ4DWAOtu7XifGeNEgNYA6+ztSKNEqehSVIlBrAkVWIAS1Il9oA1VOabFQHOjNBwMoA1VLpnRYAzIzS8DGANHWdFaFTYA5akSgxgSarEAJakSgxgSarEAJakShoP4IhYFBGtiPhSOV8XEY9HxGRE/GlELC7jryvnk+X1tU3XJkk19eMO+CPAUx3nfwTclZlvA14Atpfx7cALZfyucp0kjaxGAzgi1gD/GPh0OQ/gJuChcske4LZyvLWcU16/uVwvSSOp6Tvgu4HfA86U8+XAjzLzdDk/AkyU4wngOYDy+qly/atExI6IOBARB06ePNlg6ZLUrMYCOCL+CXAiM59YyO+bmbszc1NmblqxYsVCfmtJ6qsmH0V+D/BPI+JW4PXAm4BPAMsi4opyl7sGmCrXTwHXAEci4gpgKfCDBuuTpKoauwPOzN/PzDWZuRZ4P/CVzPwt4KvAr5fLtgF7y/G+ck55/SuZ6b7qkkZWjXnA/xb4aERMMtPjvb+M3w8sL+MfBe6sUJsk9U1fVkPLzK8BXyvHh4B3zXPNy8Bv9KOeQTc9PU2r1Tpn3DVvpdHicpQDqNVqsevevSydWD835pq30ugxgAfU0on1rnkrjTjXgpCkSgxgSarEAJakSgxgSarEAJakSpwFMSTOvHKadrs9d95ut8kzPigI5/5uwDnTGg4G8JB48fiz3H34ZVZNzoTu1MFvsmzDDZWrGgzdvxvnTGtYGMBDZMmqtXNzg099/1DlagZL5+9GGhb2gCWpkp4COCLe08uYJKl3vd4B39PjmCSpR+ftAUfEu4FfAFZExEc7XnoTsKjJwiRp1F3oTbjFwE+X65Z0jP8tZxdVlyRdgvMGcGZ+Hfh6RHw2M5/pU02SNBZ6nYb2uojYDazt/JrMvKmJoiRpHPQawP8D+BTwaeCV5sqRpPHRawCfzsz7Gq1EksZMr9PQ/iwi/lVErI6Iq2c/Gq1MkkZcr3fAs9vF/27HWALr57lWktSDngI4M9c1XYgkjZueAjgi7phvPDMfWNhyJGl89NqC+PmO49cDNwN/CRjAknSJem1B7Oo8j4hlwBeaKEiSxsWlLkf5Y8C+sCRdhl57wH/GzKwHmFmE5x8ADzZV1LiZnp6m1WrNnbvdkDQeeu0B/6eO49PAM5l5pIF6xlKr1WLXvXtZOjEzq8/thqTx0GsP+OsRsZKzb8Y93VxJ42npxHq3G5LGTK87YtwO/B/gN4DbgccjwuUoJeky9NqC+PfAz2fmCYCIWAH8BfBQU4WNMnu+zXKbeg2LXgP4p2bDt/gBbuh5yez5Nstt6jUseg3gRyPiy8Dny/lvAo80U9J4sOfbLLep1zC40J5wbwNWZubvRsQ/A95bXvrfwOeaLk6SRtmF7oDvBn4fIDO/CHwRICL+YXntVxusTZJG2oX6uCsz8zvdg2VsbSMVSdKYuFAALzvPa29YwDokaexcKIAPRMQ/7x6MiN8GnmimJEkaDxfqAf8O8HBE/BZnA3cTsBj4tfN9YUS8HvgG8Lrycx7KzI9FxDpmVlJbXr7nBzNzOiJex8zyljcwM83tNzPz8KX8oSRpGJz3Djgzj2fmLwB/ABwuH3+Qme/OzGMX+N7/D7gpM98JXAdsiYgbgT8C7srMtwEvANvL9duBF8r4XeU6SRpZPT1MkZlfzcx7ysdXevyazMy/K6dXlo8EbuLsE3R7gNvK8dZyTnn95oiIXn6WJA2jRp9mi4hFEXEQOAE8BvwN8KPMPF0uOQJMlOMJ4DmA8vopZtoUkjSSGg3gzHwlM68D1gDvAn7ucr9nROyIiAMRceDkyZOX++0kqZq+rOeQmT8Cvgq8G1gWEbNv/q0BpsrxFHANQHl9KTNvxnV/r92ZuSkzN61YsaLp0iWpMY0FcESsKHvHERFvAN4HPMVMEM8uZbkN2FuO95VzyutfyUyXCJM0snpdjOdSrAb2RMQiZoL+wcz8UkR8D/hCRPwh0ALuL9ffD/z3iJgEfgi8v8HaJKm6xgI4M58ENs4zfoiZfnD3+MvMLPguSWPBNX0lqRIDWJIqabIHrMItiCTNxwDuA7cgkjQfA7hP3IJIUjd7wJJUiQEsSZUYwJJUiQEsSZUYwJJUiQEsSZU4DW0BdD9oAbBx40YWL15cqSJJw8AAXgDdD1qcmjrEPTth8+bNlSuTNMgM4AXS+aCFJPXCHrAkVWIAS1IlBrAkVWIPWCPvzCunabfbrxpzlooGgQGskffi8We5+/DLrJqcWYPZWSoaFAZwA7rvuFyAvb4lq9Y6S0UDxwBuQPcdlwuwDxZbEhoUBnBDOu+4XIB9sNiS0KAwgDWWbEloEDgNTZIqMYAlqRJbEBdpvpXPnOUg6VIYwBepe+UzcJaDpEtjAF+C7pXPnOUg6VLYA5akSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakShoL4Ii4JiK+GhHfi4jvRsRHyvjVEfFYRDxdPl9VxiMiPhkRkxHxZERc31RtkjQImrwDPg38m8y8FrgR2BkR1wJ3AvszcwOwv5wD3AJsKB87gPsarE2SqmssgDPzaGb+ZTl+EXgKmAC2AnvKZXuA28rxVuCBnPEtYFlErG6qPkmqrS894IhYC2wEHgdWZubR8tIxYGU5ngCe6/iyI2Ws+3vtiIgDEXHg5MmTzRUtSQ1rfD3giPhp4H8Cv5OZfxsRc69lZkbERW0lkZm7gd0AmzZtchsKXTa3qVctjQZwRFzJTPh+LjO/WIaPR8TqzDxaWgwnyvgUcE3Hl68pY1Kj3KZetTQ5CyKA+4GnMvO/dLy0D9hWjrcBezvG7yizIW4ETnW0KqRGzW5Tv3zd21+13ZTUpCbvgN8DfBD4TkQcLGP/Dvg48GBEbAeeAW4vrz0C3ApMAi8BH2qwNkmqrrEAzsz/BcRrvHzzPNcnsLOpeiRp0PgknCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRV0viOGMNuenqaVqs1d95ut8kzbsQh6fIZwBfQarXYde/euUW6pw5+k2UbbqhclaRRYAD3YOnEepavezsAp75/qHI1kkaFPWBJqsQAlqRKDGBJqsQAlqRKfBNO6nLmldO02+1zxjdu3MjixYsrVKRRZQBLXV48/ix3H36ZVZNn53ufmjrEPTth8+bNFSvTqDGApXksWbV2buqh1BR7wJJUiQEsSZUYwJJUiQEsSZX4JpzUg/mmpjktTZfLAJZ60D01zWlpWggGsNQjp6ZpodkDlqRKDGBJqsQWRBe3IJLULwZwF7cgktQvBvA83IJIUj/YA5akSgxgSarEAJakShoL4Ij4TESciIh2x9jVEfFYRDxdPl9VxiMiPhkRkxHxZERc31RdkjQomrwD/iywpWvsTmB/Zm4A9pdzgFuADeVjB3Bfg3VJ0kBoLIAz8xvAD7uGtwJ7yvEe4LaO8QdyxreAZRGxuqnaJGkQ9LsHvDIzj5bjY8DKcjwBPNdx3ZEydo6I2BERByLiwMmTJ5urVJIaVu1NuMxM4KIfMcvM3Zm5KTM3rVixooHKJKk/+h3Ax2dbC+XziTI+BVzTcd2aMiZJI6vfAbwP2FaOtwF7O8bvKLMhbgROdbQqJGkkNfYockR8Hvgl4M0RcQT4GPBx4MGI2A48A9xeLn8EuBWYBF4CPtRUXZI0KBoL4Mz8wGu8dPM81yaws6laJGkQ+SScJFViAEtSJS5HKS2A7oX8wV2TdWEGsHQJurepb7fbfOprkyxb81bAXZPVGwNYugTd29TP7pzirsm6GAawdIk6t6l35xRdCt+Ek6RKDGBJqsQAlqRKDGBJqsQAlqRKDGBJqsQAlqRKnAcsNaD7STnw0WSdywCWGtD9pJyPJms+BrDUkM4n5aT52AOWpEq8A5b6wJ6w5mMAS31gT1jzMYClPrEnrG72gCWpEgNYkioZ+xZE915e7XabPJMVK5I0LsY+gFutFrvu3cvSifXA2a1lpCbNNysCnBkxbsY+gAGWTqx3axn1VfesCHBmxDgauwC25aBB4awIjV0A23KQNCjGLoDBloOkweA0NEmqxACWpErGsgUhDYPuN4zBaWqjxgCWBkT33OB2u82nvjbJsjVvBZymNooMYGlAdM8Nnp2hM/uGsUtajh4DWBognXODu2fodAf0C88+zb+8qc073vGOuWsuNpBtc9RlAEtDpDug7/7yd88byHD+QO2eF2+bo78MYGmInS+Q4dxAne9J0DetXtdzm2O+O+bua9Q7A1gaId2PN1/ojb3uJ0EvtHNH9x3zfNeodwawNMIu9MbefE+Cdob4fAHeecc83zU/+clPALjyyivnxs53hzzOfWgDWBpx53tj70JeK8AvdM2iJctZ9dZrZ35mD22QcZ1uN1ABHBFbgE8Ai4BPZ+bHK5ckjb1eArz7miuWrjrvXfR8bZDz9aFhNO+KByaAI2IRcC/wPuAI8O2I2JeZ36tbmaTLcbFtkPnWSr7YKXfdd9nztUW6v0cvX7PQ/ycwMAEMvAuYzMxDABHxBWArsOABfGrq7F/4352YYtH/fZkfvPHvXdL5QnwPf8Zgfc9R+RkDVfeS5XR68djhi7r+pR8e4w/3THLVz8zcGf/4+aP83gfed86Uu1ntdps//vxjvPHNqwF4/m/aLHrDm7jqZ94yd03397jQ1/z4+aM88B//9YK2RiJzMBYjj4hfB7Zk5m+X8w8CmzPzw13X7QB2lNOfBf66r4XW82bg+dpFVDbuv4Nx//PD8P4Ons/MLd2Dg3QH3JPM3A3srl1Hv0XEgczcVLuOmsb9dzDuf34Yvd/BIC1HOQVc03G+poxJ0kgapAD+NrAhItZFxGLg/cC+yjVJUmMGpgWRmacj4sPAl5mZhvaZzPxu5bIGydi1XeYx7r+Dcf/zw4j9DgbmTThJGjeD1IKQpLFiAEtSJQbwkIiI/xARUxFxsHzcWrumfomILRHx1xExGRF31q6nhog4HBHfKX/3B2rX0w8R8ZmIOBER7Y6xqyPisYh4uny+qmaNl8sAHi53ZeZ15eOR2sX0Q8cj6rcA1wIfiIhr61ZVzS+Xv/uRmQd7AZ8Fuh9euBPYn5kbgP3lfGgZwBp0c4+oZ+Y0MPuIukZcZn4D+GHX8FZgTzneA9zWz5oWmgE8XD4cEU+Wf5oN9T+9LsIE8FzH+ZEyNm4S+POIeKI8jj+uVmbm0XJ8DFhZs5jLZQAPkIj4i4hoz/OxFbgPeCtwHXAU+M81a1XfvTczr2emFbMzIn6xdkG15cwc2qGeRzswD2IIMvMf9XJdRPxX4EsNlzMofEQdyMyp8vlERDzMTGvmG3WrquJ4RKzOzKMRsRo4Ubugy+Ed8JAo/7HN+jXg3BWrR9PYP6IeEW+MiCWzx8CvMD5//932AdvK8TZgb8VaLpt3wMPjjyPiOmb+yXUY+BdVq+kTH1EHZvqcD0cEzPxv9k8y89G6JTUvIj4P/BLw5og4AnwM+DjwYERsB54Bbq9X4eXzUWRJqsQWhCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRV8v8BuwKOiAy9i44AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.displot(data_matrix_log2[:,0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "After checking the histogram we can say that we will use the mean minus 2 standard deviation of the expressed genes." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\modos\\.conda\\envs\\Single_cell_RNA_seq\\lib\\site-packages\\ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in greater\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] }, { "data": { "text/plain": [ "array([[ True, True, True, ..., False, False, False],\n", " [ True, True, True, ..., False, False, False],\n", " [ True, True, True, ..., False, False, False],\n", " ...,\n", " [ True, True, True, ..., True, True, True],\n", " [ True, True, True, ..., True, False, False],\n", " [ True, True, True, ..., True, True, False]])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_matrix_log2 > (mean_cell-2*std)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For simplicity we call \"NaN\" each number which is below the treshold." ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "df_cell_log2 = pd.DataFrame(data=data_matrix_log2, index=df_cell_exp.index ,columns=df_cell_exp.columns)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RNA.CD8..IELs_HealthyRNA.CD8..LP_HealthyRNA.CD4..Memory_HealthyRNA.MT.hi_HealthyRNA.Cycling.T_HealthyRNA.NKs_HealthyRNA.CD4..Activated.Fos.lo_HealthyRNA.CD4..Activated.Fos.hi_HealthyRNA.CD8..IELs_UninflamedRNA.MT.hi_Uninflamed...RNA.Immature.Goblet_InflamedRNA.Stem_UninflamedRNA.Immature.Enterocytes.2_InflamedRNA.Goblet_InflamedRNA.Tuft_InflamedRNA.Enterocytes_InflamedRNA.Best4..Enterocytes_InflamedRNA.Enteroendocrine_InflamedRNA.M.cells_InflamedRNA.M.cells_Uninflamed
7SK-5.050172-4.568056-5.896044NaNNaN-2.126639NaN-6.050093-5.031619NaN...NaN-9.218304NaNNaN2.808425-8.664267-6.981502NaNNaNNaN
A1BG-4.001756-5.273798-4.336499-2.262358-1.635142NaN-4.468881-5.753884-4.236870NaN...NaN-6.240706-8.659406-3.485761NaNNaNNaNNaNNaNNaN
A1BG-AS1-1.852068-2.332119-2.853021-1.761008NaN-1.003621-2.941349-1.964903-1.710843NaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
A1CF-8.391626-7.462761NaNNaN-5.274231NaNNaNNaN-3.659258NaN...-0.596775-2.243375-0.236862-2.415296NaN-1.040307-0.3610691.40798-1.12881-0.4392
A2M-5.408426-2.221338-2.303227-2.574148-3.354513-0.873881-2.786930-2.582460-6.600697-2.040528...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

5 rows × 153 columns

\n", "
" ], "text/plain": [ " RNA.CD8..IELs_Healthy RNA.CD8..LP_Healthy RNA.CD4..Memory_Healthy \\\n", "7SK -5.050172 -4.568056 -5.896044 \n", "A1BG -4.001756 -5.273798 -4.336499 \n", "A1BG-AS1 -1.852068 -2.332119 -2.853021 \n", "A1CF -8.391626 -7.462761 NaN \n", "A2M -5.408426 -2.221338 -2.303227 \n", "\n", " RNA.MT.hi_Healthy RNA.Cycling.T_Healthy RNA.NKs_Healthy \\\n", "7SK NaN NaN -2.126639 \n", "A1BG -2.262358 -1.635142 NaN \n", "A1BG-AS1 -1.761008 NaN -1.003621 \n", "A1CF NaN -5.274231 NaN \n", "A2M -2.574148 -3.354513 -0.873881 \n", "\n", " RNA.CD4..Activated.Fos.lo_Healthy \\\n", "7SK NaN \n", "A1BG -4.468881 \n", "A1BG-AS1 -2.941349 \n", "A1CF NaN \n", "A2M -2.786930 \n", "\n", " RNA.CD4..Activated.Fos.hi_Healthy RNA.CD8..IELs_Uninflamed \\\n", "7SK -6.050093 -5.031619 \n", "A1BG -5.753884 -4.236870 \n", "A1BG-AS1 -1.964903 -1.710843 \n", "A1CF NaN -3.659258 \n", "A2M -2.582460 -6.600697 \n", "\n", " RNA.MT.hi_Uninflamed ... RNA.Immature.Goblet_Inflamed \\\n", "7SK NaN ... NaN \n", "A1BG NaN ... NaN \n", "A1BG-AS1 NaN ... NaN \n", "A1CF NaN ... -0.596775 \n", "A2M -2.040528 ... NaN \n", "\n", " RNA.Stem_Uninflamed RNA.Immature.Enterocytes.2_Inflamed \\\n", "7SK -9.218304 NaN \n", "A1BG -6.240706 -8.659406 \n", "A1BG-AS1 NaN NaN \n", "A1CF -2.243375 -0.236862 \n", "A2M NaN NaN \n", "\n", " RNA.Goblet_Inflamed RNA.Tuft_Inflamed RNA.Enterocytes_Inflamed \\\n", "7SK NaN 2.808425 -8.664267 \n", "A1BG -3.485761 NaN NaN \n", "A1BG-AS1 NaN NaN NaN \n", "A1CF -2.415296 NaN -1.040307 \n", "A2M NaN NaN NaN \n", "\n", " RNA.Best4..Enterocytes_Inflamed RNA.Enteroendocrine_Inflamed \\\n", "7SK -6.981502 NaN \n", "A1BG NaN NaN \n", "A1BG-AS1 NaN NaN \n", "A1CF -0.361069 1.40798 \n", "A2M NaN NaN \n", "\n", " RNA.M.cells_Inflamed RNA.M.cells_Uninflamed \n", "7SK NaN NaN \n", "A1BG NaN NaN \n", "A1BG-AS1 NaN NaN \n", "A1CF -1.12881 -0.4392 \n", "A2M NaN NaN \n", "\n", "[5 rows x 153 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_cell_log2.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will call every genee which is expressed below the treshold with \"NaN\"." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\modos\\.conda\\envs\\Single_cell_RNA_seq\\lib\\site-packages\\ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in less\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "df_cell_log2[df_cell_log2.values < (mean_cell-2*std)] = \"NaN\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's see what we hjave done and the avaialbe cell types." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RNA.CD8..IELs_HealthyRNA.CD8..LP_HealthyRNA.CD4..Memory_HealthyRNA.MT.hi_HealthyRNA.Cycling.T_HealthyRNA.NKs_HealthyRNA.CD4..Activated.Fos.lo_HealthyRNA.CD4..Activated.Fos.hi_HealthyRNA.CD8..IELs_UninflamedRNA.MT.hi_Uninflamed...RNA.Immature.Goblet_InflamedRNA.Stem_UninflamedRNA.Immature.Enterocytes.2_InflamedRNA.Goblet_InflamedRNA.Tuft_InflamedRNA.Enterocytes_InflamedRNA.Best4..Enterocytes_InflamedRNA.Enteroendocrine_InflamedRNA.M.cells_InflamedRNA.M.cells_Uninflamed
7SK-5.05017-4.56806-5.896044NaNNaN-2.126639NaN-6.05009-5.031619NaN...NaNNaNNaNNaN2.808425NaN-6.9815NaNNaNNaN
A1BG-4.00176-5.2738-4.336499-2.262358-1.635142NaN-4.46888-5.75388-4.236870NaN...NaN-6.24071NaN-3.48576NaNNaNNaNNaNNaNNaN
A1BG-AS1-1.85207-2.33212-2.853021-1.761008NaN-1.003621-2.94135-1.9649-1.710843NaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
A1CFNaN-7.46276NaNNaN-5.274231NaNNaNNaN-3.659258NaN...-0.596775-2.24338-0.236862-2.4153NaN-1.04031-0.3610691.40798-1.12881-0.4392
A2M-5.40843-2.22134-2.303227-2.574148-3.354513-0.873881-2.78693-2.58246-6.600697-2.040528...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

5 rows × 153 columns

\n", "
" ], "text/plain": [ " RNA.CD8..IELs_Healthy RNA.CD8..LP_Healthy RNA.CD4..Memory_Healthy \\\n", "7SK -5.05017 -4.56806 -5.896044 \n", "A1BG -4.00176 -5.2738 -4.336499 \n", "A1BG-AS1 -1.85207 -2.33212 -2.853021 \n", "A1CF NaN -7.46276 NaN \n", "A2M -5.40843 -2.22134 -2.303227 \n", "\n", " RNA.MT.hi_Healthy RNA.Cycling.T_Healthy RNA.NKs_Healthy \\\n", "7SK NaN NaN -2.126639 \n", "A1BG -2.262358 -1.635142 NaN \n", "A1BG-AS1 -1.761008 NaN -1.003621 \n", "A1CF NaN -5.274231 NaN \n", "A2M -2.574148 -3.354513 -0.873881 \n", "\n", " RNA.CD4..Activated.Fos.lo_Healthy RNA.CD4..Activated.Fos.hi_Healthy \\\n", "7SK NaN -6.05009 \n", "A1BG -4.46888 -5.75388 \n", "A1BG-AS1 -2.94135 -1.9649 \n", "A1CF NaN NaN \n", "A2M -2.78693 -2.58246 \n", "\n", " RNA.CD8..IELs_Uninflamed RNA.MT.hi_Uninflamed ... \\\n", "7SK -5.031619 NaN ... \n", "A1BG -4.236870 NaN ... \n", "A1BG-AS1 -1.710843 NaN ... \n", "A1CF -3.659258 NaN ... \n", "A2M -6.600697 -2.040528 ... \n", "\n", " RNA.Immature.Goblet_Inflamed RNA.Stem_Uninflamed \\\n", "7SK NaN NaN \n", "A1BG NaN -6.24071 \n", "A1BG-AS1 NaN NaN \n", "A1CF -0.596775 -2.24338 \n", "A2M NaN NaN \n", "\n", " RNA.Immature.Enterocytes.2_Inflamed RNA.Goblet_Inflamed \\\n", "7SK NaN NaN \n", "A1BG NaN -3.48576 \n", "A1BG-AS1 NaN NaN \n", "A1CF -0.236862 -2.4153 \n", "A2M NaN NaN \n", "\n", " RNA.Tuft_Inflamed RNA.Enterocytes_Inflamed \\\n", "7SK 2.808425 NaN \n", "A1BG NaN NaN \n", "A1BG-AS1 NaN NaN \n", "A1CF NaN -1.04031 \n", "A2M NaN NaN \n", "\n", " RNA.Best4..Enterocytes_Inflamed RNA.Enteroendocrine_Inflamed \\\n", "7SK -6.9815 NaN \n", "A1BG NaN NaN \n", "A1BG-AS1 NaN NaN \n", "A1CF -0.361069 1.40798 \n", "A2M NaN NaN \n", "\n", " RNA.M.cells_Inflamed RNA.M.cells_Uninflamed \n", "7SK NaN NaN \n", "A1BG NaN NaN \n", "A1BG-AS1 NaN NaN \n", "A1CF -1.12881 -0.4392 \n", "A2M NaN NaN \n", "\n", "[5 rows x 153 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_cell_log2.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'RNA.Macrophages_Uninflamed', 'RNA.CD4..PD1._Inflamed', 'RNA.ILCs_Inflamed', 'RNA.Cycling.TA_Healthy', 'RNA.TA.1_Inflamed', 'RNA.WNT2B..Fos.lo.1_Uninflamed', 'RNA.Tregs_Healthy', 'RNA.Best4..Enterocytes_Inflamed', 'RNA.Enterocyte.Progenitors_Inflamed', 'RNA.Microvascular_Inflamed', 'RNA.Goblet_Inflamed', 'RNA.CD4..Activated.Fos.hi_Healthy', 'RNA.CD8..IELs_Inflamed', 'RNA.WNT5B..2_Uninflamed', 'RNA.Inflammatory.Fibroblasts_Inflamed', 'RNA.CD8..IELs_Healthy', 'RNA.DC2_Inflamed', 'RNA.Tregs_Uninflamed', 'RNA.Immature.Enterocytes.1_Inflamed', 'RNA.WNT5B..2_Healthy', 'RNA.Follicular_Uninflamed', 'RNA.Inflammatory.Monocytes_Uninflamed', 'RNA.Microvascular_Uninflamed', 'RNA.Plasma_Uninflamed', 'RNA.CD8..IL17._Inflamed', 'RNA.CD8..IELs_Uninflamed', 'RNA.DC1_Inflamed', 'RNA.M.cells_Uninflamed', 'RNA.WNT5B..1_Inflamed', 'RNA.WNT2B..Fos.lo.2_Uninflamed', 'RNA.CD4..Activated.Fos.lo_Uninflamed', 'RNA.Cycling.Monocytes_Inflamed', 'RNA.NKs_Healthy', 'RNA.Secretory.TA_Inflamed', 'RNA.GC_Healthy', 'RNA.Cycling.B_Inflamed', 'RNA.Inflammatory.Monocytes_Healthy', 'RNA.MT.hi_Inflamed', 'RNA.Post.capillary.Venules_Inflamed', 'RNA.Cycling.T_Healthy', 'RNA.WNT2B..Fos.hi_Inflamed', 'RNA.RSPO3._Inflamed', 'RNA.Cycling.TA_Uninflamed', 'RNA.Enterocyte.Progenitors_Healthy', 'RNA.CD69..Mast_Inflamed', 'RNA.TA.2_Inflamed', 'RNA.TA.2_Healthy', 'RNA.DC1_Uninflamed', 'RNA.Endothelial_Inflamed', 'RNA.WNT5B..2_Inflamed', 'RNA.Pericytes_Healthy', 'RNA.Inflammatory.Monocytes_Inflamed', 'RNA.Inflammatory.Fibroblasts_Uninflamed', 'RNA.Microvascular_Healthy', 'RNA.M.cells_Inflamed', 'RNA.Glia_Healthy', 'RNA.MT.hi_Uninflamed', 'RNA.DC2_Healthy', 'RNA.Macrophages_Healthy', 'RNA.Macrophages_Inflamed', 'RNA.CD4..PD1._Healthy', 'RNA.Cycling.T_Uninflamed', 'RNA.CD69..Mast_Healthy', 'RNA.Enteroendocrine_Inflamed', 'RNA.Tregs_Inflamed', 'RNA.Secretory.TA_Healthy', 'RNA.CD4..Activated.Fos.hi_Uninflamed', 'RNA.Follicular_Inflamed', 'RNA.Plasma_Healthy', 'RNA.Immature.Enterocytes.1_Uninflamed', 'RNA.Glia_Inflamed', 'RNA.Best4..Enterocytes_Healthy', 'RNA.Best4..Enterocytes_Uninflamed', 'RNA.Stem_Inflamed', 'RNA.Tuft_Inflamed', 'RNA.Enterocytes_Uninflamed', 'RNA.Goblet_Uninflamed', 'RNA.CD4..Memory_Healthy', 'RNA.RSPO3._Uninflamed', 'RNA.Immature.Goblet_Inflamed', 'RNA.Enterocytes_Healthy', 'RNA.CD4..Activated.Fos.hi_Inflamed', 'RNA.MT.hi_Healthy', 'RNA.CD8..IL17._Healthy', 'RNA.WNT2B..Fos.lo.2_Inflamed', 'RNA.Goblet_Healthy', 'RNA.Cycling.B_Uninflamed', 'RNA.Cycling.Monocytes_Healthy', 'RNA.Pericytes_Inflamed', 'RNA.Immature.Enterocytes.2_Uninflamed', 'RNA.ILCs_Uninflamed', 'RNA.WNT2B..Fos.lo.1_Inflamed', 'RNA.Myofibroblasts_Healthy', 'RNA.CD4..Activated.Fos.lo_Healthy', 'RNA.Immature.Enterocytes.2_Healthy', 'RNA.WNT2B..Fos.lo.2_Healthy', 'RNA.M.cells_Healthy', 'RNA.CD8..IL17._Uninflamed', 'RNA.WNT2B..Fos.hi_Uninflamed', 'RNA.DC2_Uninflamed', 'RNA.TA.1_Uninflamed', 'RNA.Cycling.TA_Inflamed', 'RNA.Immature.Goblet_Healthy', 'RNA.Secretory.TA_Uninflamed', 'RNA.CD4..Memory_Uninflamed', 'RNA.Enteroendocrine_Uninflamed', 'RNA.Tuft_Uninflamed', 'RNA.Plasma_Inflamed', 'RNA.CD8..LP_Healthy', 'RNA.Stem_Uninflamed', 'RNA.Glia_Uninflamed', 'RNA.Follicular_Healthy', 'RNA.NKs_Uninflamed', 'RNA.DC1_Healthy', 'RNA.WNT5B..1_Uninflamed', 'RNA.CD4..Activated.Fos.lo_Inflamed', 'RNA.CD69..Mast_Healthy.1', 'RNA.NKs_Inflamed', 'RNA.Stem_Healthy', 'RNA.Inflammatory.Fibroblasts_Healthy', 'RNA.CD8..LP_Inflamed', 'RNA.CD69..Mast_Uninflamed.1', 'RNA.GC_Inflamed', 'RNA.Enterocyte.Progenitors_Uninflamed', 'RNA.GC_Uninflamed', 'RNA.Enterocytes_Inflamed', 'RNA.CD69..Mast_Inflamed.1', 'RNA.WNT2B..Fos.lo.1_Healthy', 'RNA.ILCs_Healthy', 'RNA.Post.capillary.Venules_Healthy', 'RNA.CD4..Memory_Inflamed', 'RNA.Myofibroblasts_Inflamed', 'RNA.WNT2B..Fos.hi_Healthy', 'RNA.WNT5B..1_Healthy', 'RNA.RSPO3._Healthy', 'RNA.Cycling.B_Healthy', 'RNA.CD8..LP_Uninflamed', 'RNA.Enteroendocrine_Healthy', 'RNA.TA.1_Healthy', 'RNA.Tuft_Healthy', 'RNA.Post.capillary.Venules_Uninflamed', 'RNA.Myofibroblasts_Uninflamed', 'RNA.Immature.Enterocytes.2_Inflamed', 'RNA.TA.2_Uninflamed', 'RNA.Immature.Goblet_Uninflamed', 'RNA.Endothelial_Uninflamed', 'RNA.Cycling.Monocytes_Uninflamed', 'RNA.Pericytes_Uninflamed', 'RNA.Immature.Enterocytes.1_Healthy', 'RNA.Cycling.T_Inflamed', 'RNA.CD69..Mast_Uninflamed', 'RNA.CD4..PD1._Uninflamed', 'RNA.Endothelial_Healthy'}\n" ] } ], "source": [ "print(set(df_cell_log2.columns))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next we select healthy/uninflamed average expression data in the 5 selected cell types." ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "df_macrophage_healthy = df_cell_log2['RNA.Macrophages_Healthy']\n", "df_macrophage_uninflamed = df_cell_log2['RNA.Macrophages_Uninflamed']\n", "\n", "df_DC1_healthy = df_cell_log2['RNA.DC1_Healthy']\n", "df_DC1_uninflamed = df_cell_log2['RNA.DC1_Uninflamed']\n", "\n", "df_Treg_healthy = df_cell_log2['RNA.Tregs_Healthy']\n", "df_Treg_uninflamed = df_cell_log2['RNA.Tregs_Uninflamed'] \n", "\n", "df_myofibroblast_healthy = df_cell_log2['RNA.Myofibroblasts_Healthy']\n", "df_myofibroblast_uninflamed = df_cell_log2['RNA.Myofibroblasts_Uninflamed']\n", "\n", "df_goblet_healthy = df_cell_log2['RNA.Goblet_Healthy']\n", "df_goblet_uninflamed = df_cell_log2['RNA.Goblet_Uninflamed']\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For calcualting the intercellular interactions we can store theese dataframes in a dictionarry." ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "healthy_cell_types = {\"DC1\": df_DC1_healthy,\"Macrophage\": df_macrophage_healthy, \"Goblet\": df_goblet_healthy,\n", " \"Myofibroblast\": df_myofibroblast_healthy, 'Treg': df_Treg_healthy}\n", "\n", "uninflamed_cell_types = {\"DC1\": df_DC1_uninflamed,\"Macrophage\": df_macrophage_uninflamed, \"Goblet\": df_goblet_uninflamed,\n", " \"Myofibroblast\": df_myofibroblast_uninflamed, 'Treg': df_Treg_uninflamed} " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next we can use the intercellular interactions." ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# create dictionaries for source-target interactions and their annotations from OmniPath\n", "interactions = {}\n", "interaction_annotation = {}\n", "\n", "for i, interaction in filtered_intercell_network.iterrows():\n", " if interaction[\"genesymbol_intercell_source\"] not in interactions:\n", " interactions[interaction[\"genesymbol_intercell_source\"]] = []\n", " if interaction[\"genesymbol_intercell_target\"] not in interactions[interaction[\"genesymbol_intercell_source\"]]:\n", " interactions[interaction[\"genesymbol_intercell_source\"]].append(interaction[\"genesymbol_intercell_target\"])\n", " source_tupple = (interaction[\"genesymbol_intercell_source\"],interaction[\"category_intercell_source\"])\n", " target_tupple = (interaction[\"genesymbol_intercell_target\"],interaction[\"category_intercell_target\"])\n", " if source_tupple not in interaction_annotation:\n", " interaction_annotation[source_tupple] = []\n", " if target_tupple not in interaction_annotation[source_tupple]:\n", " interaction_annotation[source_tupple].append(target_tupple)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create all of the possible interactions of cells (independently from the condition)\n", "Important: directionality (A-B and B-A are different)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "def create_interactions(cells, healthy_cell_types,interactions):\n", " healthy_interactions = set()\n", " for source in healthy_cell_types[cells[0]].keys():\n", " # selecting those ones which play role in intercellular communication as a transmitter\n", " if source in interactions.keys():\n", " # iterating through target cell type expressed genes\n", " for target in healthy_cell_types[cells[1]].keys():\n", "\n", " # selecting those ones which play role in intercellular communication as a receiver\n", " if target in interactions[source]:\n", " if str(healthy_cell_types[cells[1]][target]) != 'nan' and str(healthy_cell_types[cells[0]][source]) != 'nan':\n", " healthy_interactions.add((source, target))\n", " return healthy_interactions\n", " " ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "#Creating dictinaries dataframe for healthy and UC interactions\n", "healthy_intercell_tupplellist = []\n", "uc_intercell_tupplelist = []" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DC1 Macrophage\n", "DC1 Goblet\n", "DC1 Myofibroblast\n", "DC1 Treg\n", "Macrophage DC1\n", "Macrophage Goblet\n", "Macrophage Myofibroblast\n", "Macrophage Treg\n", "Goblet DC1\n", "Goblet Macrophage\n", "Goblet Myofibroblast\n", "Goblet Treg\n", "Myofibroblast DC1\n", "Myofibroblast Macrophage\n", "Myofibroblast Goblet\n", "Myofibroblast Treg\n", "Treg DC1\n", "Treg Macrophage\n", "Treg Goblet\n", "Treg Myofibroblast\n" ] } ], "source": [ "for i in healthy_cell_types.keys():\n", " for j in healthy_cell_types.keys():\n", " if i!=j:\n", " print(i,j)\n", " # storing cell-type specific connections in sets\n", " healthy_interactions = create_interactions([i,j], healthy_cell_types, interactions)\n", " UC_interactions = create_interactions([i,j], uninflamed_cell_types,interactions)\n", " # writing out the condition specific interactions\n", " healthy_only = healthy_interactions.difference(UC_interactions)\n", " UC_only = UC_interactions.difference(healthy_interactions)\n", " #Adding the intercellular interactions\n", " healthy_intercell_tupplellist.append((i,j,len(healthy_only)))\n", " uc_intercell_tupplelist.append((i,j,len(UC_only)))\n", "\n", " with open(i + \"_\" + j + \"_healthy_only.txt\", 'w') as output_file_1:\n", " #print(UC_only)\n", " for inter in healthy_only:\n", " for source_annotation in interaction_annotation:\n", " if inter[0] == source_annotation[0]:\n", " for target_annotation in interaction_annotation[source_annotation]:\n", " if inter[1] == target_annotation[0]:\n", " output_file_1.write(source_annotation[0] + \",\" + source_annotation[1] + \",\" \n", " + target_annotation[0] + \",\" + target_annotation[1] + \"\\n\")\n", " with open(i + \"_\" + j + \"_UC_only.txt\", 'w') as output_file_2:\n", " for inter in UC_only:\n", " for source_annotation in interaction_annotation:\n", " if inter[0] == source_annotation[0]:\n", " for target_annotation in interaction_annotation[source_annotation]:\n", " if inter[1] == target_annotation[0]:\n", " output_file_2.write(source_annotation[0] + \",\" + source_annotation[1] + \",\"\n", " + target_annotation[0] + \",\" + target_annotation[1] + \"\\n\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next we make the graphs of the interactions between the cells." ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "g_healthy = ig.Graph.TupleList(healthy_intercell_tupplellist, weights=True, directed=True)\n", "g_uc = ig.Graph.TupleList(uc_intercell_tupplelist, weights=True, directed=True)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('DC1', 'Macrophage', 912),\n", " ('DC1', 'Goblet', 781),\n", " ('DC1', 'Myofibroblast', 731),\n", " ('DC1', 'Treg', 557),\n", " ('Macrophage', 'DC1', 568),\n", " ('Macrophage', 'Goblet', 689),\n", " ('Macrophage', 'Myofibroblast', 617),\n", " ('Macrophage', 'Treg', 501),\n", " ('Goblet', 'DC1', 457),\n", " ('Goblet', 'Macrophage', 652),\n", " ('Goblet', 'Myofibroblast', 483),\n", " ('Goblet', 'Treg', 438),\n", " ('Myofibroblast', 'DC1', 437),\n", " ('Myofibroblast', 'Macrophage', 558),\n", " ('Myofibroblast', 'Goblet', 483),\n", " ('Myofibroblast', 'Treg', 387),\n", " ('Treg', 'DC1', 491),\n", " ('Treg', 'Macrophage', 800),\n", " ('Treg', 'Goblet', 682),\n", " ('Treg', 'Myofibroblast', 616)]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "healthy_intercell_tupplellist" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can visualise the interactions.\n", "Igraph contain the visulasitation parameters as a dictionarry. You can also wirte out the edgelist to visualise it in cytoscape." ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g_healthy.es" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "visual_style = {}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Below are the iGraph visual parameters which you can play with for your intercellular network." ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# Curve the edges\n", "visual_style[\"edge_curved\"] = True\n", "# Set the layout\n", "my_layout = g_healthy.layout_circle(order=[\"Goblet\",\"Treg\",\"Myofibroblast\",\"Macrophage\",\"DC1\"])\n", "visual_style[\"layout\"] = my_layout\n", "#Add annoation\n", "visual_style[\"vertex_label\"] = g_healthy.vs[\"name\"]\n", "#Calcualte the edge wheight relative the number of edges\n", "visual_style[\"edge_width\"] = 0.01 * np.array(g_healthy.es[\"weight\"])\n", "\n", "#Setting the vertex visualistation parameters -colours and label position\n", "visual_style[\"vertex_label_size\"] = 30\n", "visual_style[\"vertex_color\"] = \"grey\"\n", "visual_style[\"vertex_label_dist\"] = 0.75\n", "visual_style[\"vertex_size\"] = 40\n", "\n", "#Setting the edge visualisation parameters\n", "visual_style[\"edge_color\"] = \"#004C66\" #hexa code for blue\n", "visual_style[\"edge_arrow_size\"] = 2\n", "\n", "#Setting the visualistation place. igraph need margin yo work\n", "visual_style[\"bbox\"] = (600, 600)\n", "visual_style[\"margin\"] = 100" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 38, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "ig.plot(g_healthy, \"healthy.png\", **visual_style)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can do the same for the uninflmaed UC cells." ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "# Curve the edges\n", "visual_style[\"edge_curved\"] = True\n", "# Set the layout - we keep the same layout for comapriosn\n", "visual_style[\"layout\"] = my_layout\n", "#Add annoation\n", "visual_style[\"vertex_label\"] = g_uc.vs[\"name\"]\n", "#Calcualte the edge wheight relative the number of edges\n", "visual_style[\"edge_width\"] = 0.01 * np.array(g_uc.es[\"weight\"])\n", "\n", "#Setting the vertex visualistation parameters -colours and label position\n", "visual_style[\"vertex_label_size\"] = 30\n", "visual_style[\"vertex_color\"] = \"grey\"\n", "visual_style[\"vertex_label_dist\"] = 0.75\n", "visual_style[\"vertex_size\"] = 40\n", "\n", "#Setting the edge visualisation parameters\n", "visual_style[\"edge_color\"] = \"#CE1612\" #hexa code for red\n", "visual_style[\"edge_arrow_size\"] = 2\n", "\n", "#Setting the visualistation place. igraph need margin yo work\n", "visual_style[\"bbox\"] = (600, 600)\n", "visual_style[\"margin\"] = 100" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 40, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "ig.plot(g_uc, \"UC_uninflamed.png\", **visual_style)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Below you can see what kind of data we have used." ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['builtins',\n", " 'builtins',\n", " 'pandas',\n", " 'igraph',\n", " 'os',\n", " 'numpy',\n", " 'omnipath',\n", " 'seaborn',\n", " 'types']" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import types\n", "def imports():\n", " for name, val in globals().items():\n", " if isinstance(val, types.ModuleType):\n", " yield val.__name__\n", "list(imports())" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pandas 1.1.4\n", "igraph 0.8.3\n", "numpy 1.18.5\n", "omnipath 1.0.0\n", "seaborn 0.11.0\n" ] } ], "source": [ "print('\\n'.join(f'{m.__name__} {m.__version__}' for m in globals().values() if getattr(m, '__version__', None)))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 4 }