{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# What is the contribution of paralogs to SL pairs that share protein domains?" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from intermine.webservice import Service\n", "import numpy as np\n", "import scipy.io\n", "import seaborn as sns\n", "from scipy import stats, optimize, interpolate\n", "import pandas as pd\n", "from collections import defaultdict \n", "import math\n", "import matplotlib.pyplot as plt\n", "from scipy.stats import norm, lognorm\n", "from scipy import stats\n", "import matplotlib.cm as cm\n", "import matplotlib.mlab as mlab\n", "import os, fnmatch" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "script_dir = os.path.dirname('__file__') #<-- absolute dir the script is in\n", "rel_path_sl=\"datasets/data-synthetic-lethals.xlsx\"\n", "rel_path_paralogs='datasets/paralogs-all-unique-SL-pairs.xlsx'\n", "\n", "abs_file_path_sl = os.path.join(script_dir, rel_path_sl)\n", "abs_file_path_paralogs = os.path.join(script_dir, rel_path_paralogs)\n", "\n", "# os.chdir('../') #<-- for binder os.chdir('../')\n", "\n", "data_sl=pd.read_excel(abs_file_path_sl,header=0)\n", "all_paralogs_from_sl=pd.read_excel(abs_file_path_paralogs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Inspecting the paralogs pairs\n", "\n", "- this is the first check to analyze if the reason why a SL pair shares domains is because they are also paralogs." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "query_paralogs_pd=all_paralogs_from_sl.drop(columns='Unnamed: 0')\n", "query_paralogs_pd.columns=['name-gene','name-paralogue']\n", "query_paralogs_pd_withoutnan=query_paralogs_pd.dropna()\n", "query_paralogs_pd_withoutnan.index=np.arange(0,len(query_paralogs_pd_withoutnan))\n", "query_paralogs_pd=query_paralogs_pd_withoutnan" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Taking the indexes of the paralogs pairs that are also synthetic lethal" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "indexes_sl_query=[]\n", "\n", "for i in np.arange(0,len(query_paralogs_pd)):\n", " paralog_target=query_paralogs_pd[query_paralogs_pd['name-gene']==query_paralogs_pd['name-gene'][i]]['name-paralogue'].tolist()[0]\n", " list_targets_sl=data_sl[data_sl['gene-query-name']==query_paralogs_pd['name-gene'][i]]['gene-target-name'].tolist()\n", "\n", "\n", " \n", " if paralog_target in list_targets_sl:\n", " indexes_sl_query.append(query_paralogs_pd[query_paralogs_pd['name-paralogue']==paralog_target].index[0])\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Putting 1's if the paralog pair is also SL" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "sL_values=np.zeros_like(query_paralogs_pd['name-gene'])\n", "for i in np.arange(0,len(query_paralogs_pd)):\n", " if i in indexes_sl_query:\n", " sL_values[i]=1\n", "query_paralogs_pd['sL']=sL_values\n", "\n", "paralogs_sl_pd=query_paralogs_pd" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | name-gene | \n", "name-paralogue | \n", "sL | \n", "
---|---|---|---|
2 | \n", "ADE16 | \n", "ADE17 | \n", "1 | \n", "
6 | \n", "AFT1 | \n", "AFT2 | \n", "1 | \n", "
8 | \n", "AGA1 | \n", "FIG2 | \n", "1 | \n", "
11 | \n", "AIR1 | \n", "AIR2 | \n", "1 | \n", "
16 | \n", "ALP1 | \n", "CAN1 | \n", "1 | \n", "
\n", " | name-gene | \n", "name-paralogue | \n", "sL | \n", "
---|---|---|---|
0 | \n", "ADE16 | \n", "ADE17 | \n", "1 | \n", "
1 | \n", "AFT1 | \n", "AFT2 | \n", "1 | \n", "
2 | \n", "AGA1 | \n", "FIG2 | \n", "1 | \n", "
3 | \n", "AIR1 | \n", "AIR2 | \n", "1 | \n", "
4 | \n", "ALP1 | \n", "CAN1 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
171 | \n", "YCK1 | \n", "YCK2 | \n", "1 | \n", "
172 | \n", "YPC1 | \n", "YDC1 | \n", "1 | \n", "
173 | \n", "YPK1 | \n", "YPK2 | \n", "1 | \n", "
174 | \n", "YPS1 | \n", "MKC7 | \n", "1 | \n", "
175 | \n", "YPT31 | \n", "YPT32 | \n", "1 | \n", "
176 rows × 3 columns
\n", "