{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# What is the contribution of paralogs to SL pairs that share protein domains?" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from intermine.webservice import Service\n", "import numpy as np\n", "import scipy.io\n", "import seaborn as sns\n", "from scipy import stats, optimize, interpolate\n", "import pandas as pd\n", "from collections import defaultdict \n", "import math\n", "import matplotlib.pyplot as plt\n", "from scipy.stats import norm, lognorm\n", "from scipy import stats\n", "import matplotlib.cm as cm\n", "import matplotlib.mlab as mlab\n", "import os, fnmatch" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "script_dir = os.path.dirname('__file__') #<-- absolute dir the script is in\n", "rel_path_sl=\"datasets/data-synthetic-lethals.xlsx\"\n", "rel_path_paralogs='datasets/paralogs-all-unique-SL-pairs.xlsx'\n", "\n", "abs_file_path_sl = os.path.join(script_dir, rel_path_sl)\n", "abs_file_path_paralogs = os.path.join(script_dir, rel_path_paralogs)\n", "\n", "# os.chdir('../') #<-- for binder os.chdir('../')\n", "\n", "data_sl=pd.read_excel(abs_file_path_sl,header=0)\n", "all_paralogs_from_sl=pd.read_excel(abs_file_path_paralogs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Inspecting the paralogs pairs\n", "\n", "- this is the first check to analyze if the reason why a SL pair shares domains is because they are also paralogs." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "query_paralogs_pd=all_paralogs_from_sl.drop(columns='Unnamed: 0')\n", "query_paralogs_pd.columns=['name-gene','name-paralogue']\n", "query_paralogs_pd_withoutnan=query_paralogs_pd.dropna()\n", "query_paralogs_pd_withoutnan.index=np.arange(0,len(query_paralogs_pd_withoutnan))\n", "query_paralogs_pd=query_paralogs_pd_withoutnan" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Taking the indexes of the paralogs pairs that are also synthetic lethal" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "indexes_sl_query=[]\n", "\n", "for i in np.arange(0,len(query_paralogs_pd)):\n", " paralog_target=query_paralogs_pd[query_paralogs_pd['name-gene']==query_paralogs_pd['name-gene'][i]]['name-paralogue'].tolist()[0]\n", " list_targets_sl=data_sl[data_sl['gene-query-name']==query_paralogs_pd['name-gene'][i]]['gene-target-name'].tolist()\n", "\n", "\n", " \n", " if paralog_target in list_targets_sl:\n", " indexes_sl_query.append(query_paralogs_pd[query_paralogs_pd['name-paralogue']==paralog_target].index[0])\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Putting 1's if the paralog pair is also SL" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "sL_values=np.zeros_like(query_paralogs_pd['name-gene'])\n", "for i in np.arange(0,len(query_paralogs_pd)):\n", " if i in indexes_sl_query:\n", " sL_values[i]=1\n", "query_paralogs_pd['sL']=sL_values\n", "\n", "paralogs_sl_pd=query_paralogs_pd" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
name-genename-paraloguesL
2ADE16ADE171
6AFT1AFT21
8AGA1FIG21
11AIR1AIR21
16ALP1CAN11
\n", "
" ], "text/plain": [ " name-gene name-paralogue sL\n", "2 ADE16 ADE17 1\n", "6 AFT1 AFT2 1\n", "8 AGA1 FIG2 1\n", "11 AIR1 AIR2 1\n", "16 ALP1 CAN1 1" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sl_that_are_paralogs=paralogs_sl_pd[paralogs_sl_pd['sL']==1]\n", "sl_that_are_paralogs.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
name-genename-paraloguesL
0ADE16ADE171
1AFT1AFT21
2AGA1FIG21
3AIR1AIR21
4ALP1CAN11
............
171YCK1YCK21
172YPC1YDC11
173YPK1YPK21
174YPS1MKC71
175YPT31YPT321
\n", "

176 rows × 3 columns

\n", "
" ], "text/plain": [ " name-gene name-paralogue sL\n", "0 ADE16 ADE17 1\n", "1 AFT1 AFT2 1\n", "2 AGA1 FIG2 1\n", "3 AIR1 AIR2 1\n", "4 ALP1 CAN1 1\n", ".. ... ... ..\n", "171 YCK1 YCK2 1\n", "172 YPC1 YDC1 1\n", "173 YPK1 YPK2 1\n", "174 YPS1 MKC7 1\n", "175 YPT31 YPT32 1\n", "\n", "[176 rows x 3 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sl_that_are_paralogs.set_index(np.arange(0,len(sl_that_are_paralogs)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## What is the contribution of paralogs to SL that share protein domains?" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "pairs_sL=np.load('pairs-sL-that-share-domains.npy')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "shared_sL_paralogs=[]\n", "for i in np.arange(0,len(sl_that_are_paralogs)):\n", " for j in np.arange(0,len(pairs_sL)):\n", " if set(sl_that_are_paralogs.iloc[i,0:2].tolist())==set(pairs_sL[j]):\n", " shared_sL_paralogs.append(pairs_sL[j])\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The contribution of paralogs to the SL pairs that shared domains is = 1.6042780748663101 %\n", "Only 12 out of 176 paralogs that are SL , share annotated protein domains.\n", "The number of paralogs that are SL out of the total number of paralogs is 176 out of 720 = 24.444444444444443 %\n", "The contribution of paralogs to the total number of SL pairs is = 0.9848357674444631 %\n", "The number of SL that share domains out of the total number of SL pairs is = 4.185552011638968 %\n" ] } ], "source": [ "print('The contribution of paralogs to the SL pairs that shared domains is =', 100*len(shared_sL_paralogs)/len(pairs_sL),'%')\n", "print('Only',len(shared_sL_paralogs),'out of',len(sl_that_are_paralogs),'paralogs that are SL , share annotated protein domains.')\n", "\n", "print('The number of paralogs that are SL out of the total number of paralogs is',len(sl_that_are_paralogs),'out of',len(paralogs_sl_pd),'=',100*len(sl_that_are_paralogs)/len(paralogs_sl_pd),'%')\n", "\n", "print('The contribution of paralogs to the total number of SL pairs is =', 100*len(sl_that_are_paralogs)/17871,'%')\n", "print('The number of SL that share domains out of the total number of SL pairs is =',100*len(pairs_sL)/17871,'%')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Pie chart\n", "labels = ['SL-paralogs','Paralogs non SL']\n", "sizes = [len(sl_that_are_paralogs)/len(paralogs_sl_pd),len(paralogs_sl_pd)/len(paralogs_sl_pd)-len(sl_that_are_paralogs)/len(paralogs_sl_pd)]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "filenames": { "image/png": "C:\\Users\\linigodelacruz\\Documents\\PhD_2018\\Documentation\\jupyter-book-deployment\\jupyter-book\\mini_book\\docs\\_build\\jupyter_execute\\paralogs-and-SL-pairs_16_0.png" } }, "output_type": "display_data" } ], "source": [ "#colors\n", "colors = ['#ff9999','#66b3ff','#99ff99','#ffcc99']\n", " \n", "fig1, ax1 = plt.subplots()\n", "patches, texts, autotexts = ax1.pie(sizes, colors = colors, labels=labels, autopct='%1.1f%%', startangle=90)\n", "for text in texts:\n", " text.set_color('grey')\n", "for autotext in autotexts:\n", " autotext.set_color('grey')\n", "# Equal aspect ratio ensures that pie is drawn as a circle\n", "ax1.axis('equal') \n", "plt.tight_layout()" ] } ], "metadata": { "jupytext": { "text_representation": { "extension": ".md", "format_name": "myst" } }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.8" }, "source_map": [ 10, 14, 31, 43, 50, 56, 60, 72, 76, 86, 91, 93, 97, 102, 112, 123, 129 ] }, "nbformat": 4, "nbformat_minor": 4 }