编译chromium缺少LASTCHANGE问题

遇到WebKit项目中错误提示'../../build/util/LASTCHANGE.blink',neededby'gen/webkit_version.h',missingandnoknownruletomakeit时,通过在src目录执行特定的两个脚本来解决此问题。步骤包括运行两个脚本: 1. ./build/util/lastchange.py build/util/LASTCHANGE 2. ./build/util/lastchange.py-sthird_party/WebKit/-obuild/util/LASTCHANGE.blink 遵循这些操作即可成功修复错误。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

错误: error: '../../build/util/LASTCHANGE.blink', needed by 'gen/webkit_version.h', missing and no known rule to make it

解决:src目录执行两个脚本

./build/util/lastchange.py  build/util/LASTCHANGE

./build/util/lastchange.py -s third_party/WebKit/ -o build/util/LASTCHANGE.blink

开发者提供了一个脚本generate_ptm_coordinates.py,请问我要怎么运行? import numpy as np import pandas as pd from ExonPTMapper import mapping from ExonPTMapper import config as mapper_config from ptm_pose import pose_config from Bio import SeqIO import codecs import gzip import os import datetime import pyliftover from tqdm import tqdm def check_constitutive(ptm, nonconstitutive_list): """ For a given list of ptms, check if any of the ptms are found in the list of nonconstitutive ptms, meaning that they have previously been found to be missing from isoforms in Ensembl Parameters ---------- ptm : list List of PTMs to check (each ptm should be in the form of "UniProtID_ResiduePosition" (e.g. "P12345-1_Y100")) nonconstitutive_list : list List of PTMs that have been found to be nonconstitutive (based on data from ptm_info object generated by ExonPTMapper) """ if ptm in nonconstitutive_list: return False else: return True def extract_ptm_position_of_primary_isoform(row): """ For a given row in the ptm_coordinates dataframe, extract the position of the PTM in the primary isoform (canonical if available, else first isoform in list) """ pass def get_unique_ptm_info(ptm_coordinates): """ For a given row in the ptm_coordinates dataframe, isolate PTM entries corresponding to the canonical isoform ID, if available. If not, use the first isoform ID in the list. Further, indicate whether the entry corresponds to a canonical or alternative isoform. Parameters ---------- ptm_coordinates : pd.DataFrame DataFrame containing PTM coordinates, generated by ExonPTMapper package Returns ------- pd.DataFrame DataFrame containing PTM coordinates with additional columns indicating the UniProtKB accession number, the isoform ID, the type of isoform (canonical or alternative), the residue of the PTM, the position of the PTM in the isoform, and any alternative entries that also contain the PTM """ accession_list = [] #list of UniProtKB/Swiss-Prot accessions residue_list = [] position_list = [] isoform_list = [] isoform_type = [] for i,row in ptm_coordinates.iterrows(): ptm_entries = row['Source of PTM'].split(';') residue = ptm_entries[0].split('_')[1][0] #iterate through each PTM entry and extract any that are associated with a canonical isoform. This will usually only be one, but in rare cases a PTM may be associated with multiple genes found_in_canonical = False positions = [] accessions = [] isoform_entries = [] for ptm in ptm_entries: if ptm.split('_')[0] in mapper_config.canonical_isoIDs.values(): #check if the uniprot isoform ID is a canonical isoform. if so, add the position to the list of positions positions.append(ptm.split('_')[1][1:]) accessions.append(ptm.split('-')[0]) #uniprot accession number isoform_entries.append(ptm.split('_')[0]) #uniprot isoform ID found_in_canonical = True #indicate that there is a PTM found in the canonical isoform #check if position in canonical was found. If so, join the positions into a single string. If not, use the position associated with the first listed isoform if found_in_canonical: positions = ';'.join(positions) accessions = ';'.join(accessions) isoform_entries = ';'.join(isoform_entries) isoform_type.append('Canonical') else: positions = ptm_entries[0].split('_')[1][1:] accessions = ptm_entries[0].split('-')[0] isoform_entries = ptm_entries[0].split('_')[0] isoform_type.append('Alternative') accession_list.append(accessions) residue_list.append(residue) position_list.append(positions) isoform_list.append(isoform_entries) ptm_coordinates['UniProtKB Accession'] = accession_list ptm_coordinates['Isoform ID'] = isoform_list ptm_coordinates['Isoform Type'] = isoform_type ptm_coordinates['Residue'] = residue_list ptm_coordinates['PTM Position in Isoform'] = position_list return ptm_coordinates def convert_coordinates(ptm_coordinates, from_coord = 'hg38', to_coord = 'hg19'): """ Given the ptm_coordinates dataframe, convert the genomic location of the PTMs from one coordinate system to another (e.g. hg38 to hg19, hg19 to hg38, etc.) Parameters ---------- ptm_coordinates : pd.DataFrame DataFrame containing PTM coordinates from_coord : str Coordinate system to convert from (e.g. 'hg38', 'hg19', 'hg18') to_coord : str Coordinate system to convert to (e.g. 'hg38', 'hg19', 'hg18') """ # convert coordinates to hg19 and hg38 new_coords = [] liftover_object = pyliftover.LiftOver(f'{from_coord}',f'{to_coord}') for i, row in tqdm(ptm_coordinates.iterrows(), total = ptm_coordinates.shape[0], desc = f'Converting from {from_coord} to {to_coord} coordinates'): new_coords.append(mapping.convert_genomic_coordinates(row[f'Gene Location ({from_coord})'], row['Chromosome/scaffold name'], row['Strand'], from_type = f'{from_coord}', to_type = f'{to_coord}', liftover_object = liftover_object)) return new_coords residue_dict = {'R': 'arginine', 'H':'histidine', 'K':'lysine', 'D':'aspartic acid', 'E':'glutamic acid', 'S': 'serine', 'T':'threonine', 'N':'asparagine', 'Q':'glutamine', 'C':'cysteine', 'U':'selenocystein', 'G':'glycine', 'P':'proline', 'A':'alanine', 'V':'valine', 'I':'isoleucine', 'L':'leucine', 'M':'methionine', 'F':'phenylalanine', 'Y':'tyrosine', 'W':'tryptophan'} def convert_modification_shorthand(mod, res, mod_group_type = 'fine'): if mod_group_type == 'fine': if mod == 'p': mod_name = f'Phospho{residue_dict[res]}' elif mod == 'ub': mod_name = 'Ubiquitination' elif mod == 'sm': mod_name = 'Sumoylation' elif mod == 'ga': if res == 'N': mod_name = 'N-Glycosylation' else: #if you want a more general classification, this and T can be either just O-GalNAc or O-glycosylation mod_name = 'O-GalNAc ' + residue_dict[res].capitalize() elif mod == 'gl': mod_name = 'O-GlcNAc ' + residue_dict[res].capitalize() elif mod == 'm1' or mod == 'me': mod_name = 'Methylation' elif mod == 'm2': mod_name = 'Dimethylation' elif mod == 'm3': mod_name = 'Trimethylation' elif mod == 'ac': mod_name = 'Acetylation' else: raise ValueError("ERROR: don't recognize PTM type %s"%(type)) elif mod_group_type == 'coarse': if mod == 'p': mod_name = 'Phosphorylation' elif mod == 'ub': mod_name = 'Ubiquitination' elif mod == 'sm': mod_name = 'Sumoylation' elif mod == 'ga' or mod == 'gl': mod_name = 'Glycosylation' elif mod == 'm1' or mod == 'me' or mod == 'm2' or mod == 'm3': mod_name = 'Methylation' elif mod == 'ac': mod_name = 'Acetylation' return mod_name def process_PSP_df(file, compressed = False, organism = None, include_flank = False, include_domain = False, include_MS = False, include_LTP = False, mod_group_type = 'fine'): """ Process the PhosphositePlus file into a dataframe with the Uniprot ID, residue, position, and modification, and any extra columns that are requested Parameters ---------- file : str The file to read in compressed : bool If the file is compressed or not organism : str The organism to filter the data by include_flank : bool If True, include the flanking amino acids include_domain : bool If True, include the domain include_MS : bool If True, include the MS_LIT and MS_CST columns include_LTP : bool If True, include the LT column (low throughput) mod_group_type : str The type of modification grouping to use (fine or coarse) """ if compressed: df = pd.read_csv(file, sep='\t', skiprows=3, compression='gzip') else: df = pd.read_csv(file, sep='\t', skiprows=3) df['Residue'] = df['MOD_RSD'].apply(lambda x: x.split('-')[0][0]) df['Position'] = df['MOD_RSD'].apply(lambda x: int(x.split('-')[0][1:])) df['Modification'] = df['MOD_RSD'].str.split('-').str[1] df['Modification'] = df.apply(lambda x: convert_modification_shorthand(x['Modification'], x['Residue'], mod_group_type=mod_group_type), axis=1) if organism is not None: df = df[df['ORGANISM'] == organism] extra_cols = [] if organism is not None: df = df[df['ORGANISM'] == organism] else: extra_cols += ['ORGANISM'] if include_flank: extra_cols += ['SITE_+/-7_AA'] if include_domain: extra_cols += ['DOMAIN'] if include_MS: extra_cols += ['MS_LIT'] extra_cols += ['MS_CST'] if include_LTP: extra_cols += ['LT_LIT'] df = df[['ACC_ID', 'Residue', 'Position', 'Modification'] + extra_cols] return df def combine_PSP_dfs(phosphositeplus_dir, compressed = True, organism = None, include_flank = False, include_domain = False, include_MS = False, include_LTP = False, mod_group_type = 'fine'): extension = '.gz' if compressed else '' phospho = process_PSP_df(phosphositeplus_dir+'Phosphorylation_site_dataset'+extension, compressed = compressed, organism = organism, include_flank = include_flank, include_domain = include_domain, include_MS = include_MS,include_LTP=include_LTP, mod_group_type=mod_group_type) ubiq = process_PSP_df(phosphositeplus_dir+'Ubiquitination_site_dataset'+extension, compressed = compressed, organism = organism, include_flank = include_flank, include_domain = include_domain, include_MS = include_MS, include_LTP=include_LTP, mod_group_type=mod_group_type) sumo = process_PSP_df(phosphositeplus_dir+'Sumoylation_site_dataset'+extension, compressed = compressed, organism = organism, include_flank = include_flank, include_domain = include_domain, include_MS = include_MS, include_LTP=include_LTP, mod_group_type=mod_group_type) galnac = process_PSP_df(phosphositeplus_dir+'O-GalNAc_site_dataset'+extension, compressed = compressed, organism = organism, include_flank = include_flank, include_domain = include_domain, include_MS = include_MS, include_LTP=include_LTP, mod_group_type=mod_group_type) glcnac = process_PSP_df(phosphositeplus_dir+'O-GlcNAc_site_dataset'+extension, compressed = compressed, organism = organism, include_flank = include_flank, include_domain = include_domain, include_MS = include_MS, include_LTP=include_LTP, mod_group_type=mod_group_type) meth = process_PSP_df(phosphositeplus_dir+'Methylation_site_dataset'+extension, compressed = compressed, organism = organism, include_flank = include_flank, include_domain = include_domain, include_MS = include_MS, include_LTP=include_LTP, mod_group_type=mod_group_type) acetyl = process_PSP_df(phosphositeplus_dir+'Acetylation_site_dataset'+extension, compressed = compressed, organism = organism, include_flank = include_flank, include_domain = include_domain, include_MS = include_MS, include_LTP=include_LTP, mod_group_type=mod_group_type) df = pd.concat([phospho, ubiq, sumo, galnac, glcnac, meth, acetyl]) return df def extract_num_studies(psp_dir, pscout_data = None): psp_df = combine_PSP_dfs(psp_dir, compressed = True, organism = 'human', include_MS = True, include_LTP = True, mod_group_type = 'coarse') #get canonical isoform IDs canonical_ids = mapper_config.translator[mapper_config.translator['UniProt Isoform Type'] == 'Canonical'][['UniProtKB/Swiss-Prot ID', 'UniProtKB isoform ID']].set_index('UniProtKB isoform ID').squeeze().to_dict() #couple rare cases where the canonical isoform is listed with its isoform ID, which conflicts with how I processed the data. convert these ideas to base uniprot ID psp_df_isoids = psp_df[psp_df['ACC_ID'].isin(canonical_ids.keys())].copy() psp_correctids = psp_df[~psp_df['ACC_ID'].isin(canonical_ids.keys())].copy() psp_df_isoids['ACC_ID'] = psp_df_isoids['ACC_ID'].map(canonical_ids) psp_df = pd.concat([psp_df_isoids, psp_correctids]) #groupby number of experiments by PTM site and modification, taking the max value (some rare cases where entries are basically identical but have different numbers of experiments) psp_df['PTM'] = psp_df['ACC_ID'] + '_' + psp_df['Residue'] + psp_df['Position'].astype(str) psp_df = psp_df.groupby(['PTM', 'Modification'], as_index = False)[['MS_LIT', 'MS_CST', 'LT_LIT']].max() psp_df = psp_df.rename(columns = {'Modification': 'Modification Class'}) if pscout_data is not None: pscout_experiments = pscout_data[pscout_data['Number of Experiments'] > 0] pscout_experiments = pscout_experiments.dropna(subset = 'Modification Class') psp_df = psp_df.merge(pscout_experiments[['PTM', 'Number of Experiments']], on = 'PTM', how = 'outer') psp_df = psp_df.fillna(0) psp_df['MS_LIT'] = psp_df['MS_LIT'] + psp_df['Number of Experiments'] psp_df = psp_df.drop(columns = 'Number of Experiments') return psp_df def append_num_studies(ptm_coordinates, psp_df): #remove ms columns if present ptm_coordinates = ptm_coordinates.drop(columns = [i for i in ['MS_LIT', 'MS_CST', 'LT_LIT'] if i in ptm_coordinates.columns]) #add PTM column to PTM coordinates, allowing for alternative isoform IDs ptm_coordinates['PTM'] = ptm_coordinates.apply(lambda x: x['Isoform ID'] + '_'+ x['Residue'] + str(int(x['PTM Position in Isoform'])) if x['Isoform Type'] == 'Alternative' else x['UniProtKB Accession'] + '_'+ x['Residue'] + str(int(x['PTM Position in Isoform'])), axis = 1) #combine psp MS info with coordinate data, check to make sure size doesn't change original_shape = ptm_coordinates.shape[0] ptm_coordinates = ptm_coordinates.merge(psp_df[['PTM', 'Modification Class', 'MS_LIT', 'MS_CST', 'LT_LIT']], on = ['PTM', 'Modification Class'], how = 'left') if original_shape != ptm_coordinates.shape[0]: raise ValueError('Size of dataframe changed after merging PhosphoSitePlus data. Please check for duplicates.') #go through entries without info and make sure it is not due to isoform ID issues missing_db_info = ptm_coordinates[ptm_coordinates[['MS_LIT', 'MS_CST', 'LT_LIT']].isna().all(axis = 1)] ptm_coordinates = ptm_coordinates[~ptm_coordinates[['MS_LIT', 'MS_CST', 'LT_LIT']].isna().all(axis = 1)] for i, row in missing_db_info.iterrows(): mod = row['Modification Class'] sources = [s for s in row['Source of PTM'].split(';') if row['Isoform ID'] not in s] for s in sources: tmp_data = psp_df[(psp_df['PTM'] == s) & (psp_df['Modification Class'] == mod)] if tmp_data.shape[0] > 0: tmp_data = tmp_data.squeeze() missing_db_info.loc[i, 'MS_LIT'] = tmp_data['MS_LIT'] missing_db_info.loc[i, 'MS_CST'] = tmp_data['MS_CST'] missing_db_info.loc[i, 'LT_LIT'] = tmp_data['LT_LIT'] break ptm_coordinates = pd.concat([ptm_coordinates, missing_db_info]) return ptm_coordinates def construct_mod_conversion_dict(): """ Reformat modification conversion dataframe to a dictionary for easy conversion between modification site and modification class """ modification_conversion = pose_config.modification_conversion[['Modification', 'Modification Class']].set_index('Modification') modification_conversion = modification_conversion.replace('Dimethylation', 'Methylation') modification_conversion = modification_conversion.replace('Trimethylation', 'Methylation') modification_conversion = modification_conversion.squeeze().to_dict() return modification_conversion def extract_number_compendia(ps_data_file): """ Given a proteomescout data file, extract the number of different compendia that support a PTM site in a format that works with ptm_coordinates file. The following experiment IDs are used to check for database evidence: 1395: HPRD 1790: PhosphoSitePlus 1323: Phospho.ELM 1688,1803: UniProt 1344: O-GlycBase 1575: dbPTM Parameters: ------------ ps_data_file: str Path to proteomescout data file Returns: ------------ ps_data: pd.DataFrame DataFrame with PTM site, modification class, and number of compendia that support the PTM site """ #load proteomescout data ps_data = pd.read_csv(ps_data_file, sep = '\t') ps_data = ps_data[ps_data['species'] == 'homo sapiens'] ps_data['modifications'] = ps_data['modifications'].str.split(';') ps_data['evidence'] = ps_data['evidence'].str.split(';') ps_data = ps_data.explode(['modifications','evidence']) #extract site numbers and modification types into unique columns, then convert modification to broad class to match ptm coordinates dataframe ps_data['site'] = ps_data['modifications'].str.split('-').str[0] ps_data['Modification'] = ps_data['modifications'].apply(lambda x: '-'.join(x.split('-')[1:])) modification_conversion = construct_mod_conversion_dict() ps_data['Modification Class'] = ps_data['Modification'].map(modification_conversion) ps_data = ps_data.drop(columns = ['Modification']) #split evidence into a list, then check to see if which entries correspond to database rather than literature study ps_data['evidence'] = ps_data['evidence'].apply(lambda x: [i.strip(' ') for i in x.split(',')]) ps_data['database evidence'] = ps_data['evidence'].apply(lambda x: set(x).intersection({'1395', '1790', '1323','1688','1803','1344','1575'})) ps_data['experimental evidence'] = ps_data['evidence'].apply(lambda x: list(set(x).difference({'1395', '1790', '1323','1688','1803','1344','1575'}))) #add specific compendia with site compendia_dict = {'1395':'HPRD','1790':'PhosphoSitePlus', '1323':'Phospho.ELM', '1688':'UniProt', '1803':'UniProt', '1344':'O-GlycBase','1575':'dbPTM'} ps_data['Compendia'] = ps_data['database evidence'].apply(lambda x: ['ProteomeScout'] + [compendia_dict[i] for i in x]) #separate all accessions into separate rows for merge ps_data['accessions'] = ps_data['accessions'].str.split(';') ps_data = ps_data.explode('accessions') #convert isoform IDs from having . to a - for consistency ps_data['accessions'] = ps_data['accessions'].str.replace('.','-').str.strip(' ') #create PTM column ps_data['PTM'] = ps_data['accessions'] + '_' + ps_data['site'].str.strip(' ') ps_data = ps_data.dropna(subset = ['PTM']) #some ptms may have multiple entries (due to multiple similar modification types), so combine them ps_data = ps_data.groupby(['PTM', 'Modification Class'], as_index = False)[['Compendia', 'experimental evidence']].agg(sum) ps_data['Number of Compendia'] = ps_data['Compendia'].apply(lambda x: len(set(x))) ps_data['Compendia'] = ps_data['Compendia'].apply(lambda x: ';'.join(set(x))) ps_data['Number of Experiments'] = ps_data['experimental evidence'].apply(lambda x: len(set(x))) ps_data['experimental evidence'] = ps_data['experimental evidence'].apply(lambda x: ';'.join(set(x))) return ps_data def append_num_compendia(ptm_coordinates, pscout_data): """ Given a PTM coordinates dataframe and processed proteomescout data generated by `extract_num_compendia()`, append the number of compendia that support a PTM site to the PTM coordinates dataframe. Check all potential isoform IDs for compendia information. Parameters: ------------ ptm_coordinates: pd.DataFrame DataFrame with PTM site information pscout_data: pd.DataFrame DataFrame with PTM site, modification class, and number of compendia that support the PTM site """ #remove existing columns if present ptm_coordinates = ptm_coordinates.drop(columns = [i for i in ['Compendia', 'Number of Compendia'] if i in ptm_coordinates.columns]) if 'PTM' not in ptm_coordinates.columns: ptm_coordinates['PTM'] = ptm_coordinates.apply(lambda x: x['Isoform ID'] + '_'+ x['Residue'] + str(int(x['PTM Position in Isoform'])) if x['Isoform Type'] == 'Alternative' else x['UniProtKB Accession'] + '_'+ x['Residue'] + str(int(x['PTM Position in Isoform'])), axis = 1) #merge the data ptm_coordinates = ptm_coordinates.merge(pscout_data, on = ['Modification Class', 'PTM'], how = 'left') #go through any missing entries and see if they can be filled in with other isoform IDs missing_db_info = ptm_coordinates[ptm_coordinates['Number of Compendia'].isna()] ptm_coordinates = ptm_coordinates[~ptm_coordinates['Compendia'].isna()] for i, row in tqdm(missing_db_info.iterrows(), total = missing_db_info.shape[0], desc = 'Going through missing compendia data, making sure not due to isoform ID issues'): mod = row['Modification Class'] sources = [s for s in row['Source of PTM'].split(';') if row['Isoform ID'] not in s] for s in sources: tmp_data = pscout_data[(pscout_data['PTM'] == s) & (pscout_data['Modification Class'] == mod)] if tmp_data.shape[0] > 0: tmp_data = tmp_data.squeeze() missing_db_info.loc[i, 'Compendia'] = tmp_data['Compendia'] missing_db_info.loc[i, 'Number of Compendia'] = tmp_data['Number of Compendia'] break ptm_coordinates = pd.concat([ptm_coordinates, missing_db_info]) #fill in remainder entries as PhosphoSitePlus ptm_coordinates['Compendia'] = ptm_coordinates['Compendia'].fillna('PhosphoSitePlus') ptm_coordinates['Number of Compendia'] = ptm_coordinates['Number of Compendia'].fillna(1) return ptm_coordinates def separate_by_modification(ptm_coordinates, mod_mapper): #separate out modification class (important for functional analysis) ptm_coordinates['Modification Class'] = ptm_coordinates['Modification Class'].str.split(';') ptm_coordinates = ptm_coordinates.explode('Modification Class') #reduce modification columns to only those relevant to modification class mod_mapper = mod_mapper.squeeze() mod_mapper = mod_mapper.str.split(';') mod_mapper = mod_mapper.to_dict() def filter_mods(x, mod_class): """ Go through modification list and only keep those that are relevant to the modification class (removes mismatch due to exploded dataframe) """ x_list = x.split(';') return ';'.join([i for i in x_list if i in mod_mapper[mod_class]]) ptm_coordinates['Modification'] = ptm_coordinates.apply(lambda x: filter_mods(x['Modification'], x['Modification Class']), axis = 1) return ptm_coordinates def generate_ptm_coordinates(pscout_data_file, phosphositeplus_filepath, mod_mapper, remap_PTMs = False, output_dir = None, ptm_info = None): mapper = mapping.PTM_mapper() if remap_PTMs: mapper.find_ptms_all(phosphositeplus_file = phosphositeplus_filepath) mapper.mapPTMs_all() #get coordinate info if mapper.ptm_coordinates is None: raise ValueError('No PTM coordinates found. Please set remap_PTMs to True to generate PTM coordinates. If you want to include PhosphoSitePlus data, please also include location of phosphositeplus data file in phosphositeplus_filepath argument.') #copy ptm_coordinates file to be edited for easier use with PTM-POSE ptm_coordinates = mapper.ptm_coordinates.copy() ptm_coordinates = get_unique_ptm_info(ptm_coordinates) # convert coordinates to hg19 and hg38, then from hg19 to hg18 ptm_coordinates['Gene Location (hg19)'] = convert_coordinates(ptm_coordinates, from_coord = 'hg38', to_coord = 'hg19') ptm_coordinates['Gene Location (hg18)'] = convert_coordinates(ptm_coordinates, from_coord = 'hg19', to_coord = 'hg18') #separate out PTMs that are found in UniProt accessions ptm_coordinates['PTM Position in Isoform'] = ptm_coordinates['PTM Position in Isoform'].str.split(';') ptm_coordinates['UniProtKB Accession'] = ptm_coordinates['UniProtKB Accession'].str.split(';') ptm_coordinates['Isoform ID'] = ptm_coordinates['Isoform ID'].str.split(';') ptm_coordinates = ptm_coordinates.explode(['UniProtKB Accession', 'Isoform ID', 'PTM Position in Isoform']).reset_index() #for further analysis ptm_coordinates['PTM'] = ptm_coordinates['Isoform ID'] + '_' + ptm_coordinates['Residue'] + ptm_coordinates['PTM Position in Isoform'] if ptm_info is None: ptm_info = mapper.ptm_info.copy() #add whether or not the PTM is constitutive if 'PTM Conservation Score' not in ptm_info.columns: raise Warning('No PTM conservation score found in ptm_info object. Will not add column indicating whether the PTM is considered to be constitutive or not.') else: #grab nonconstitutive list from ptm_info object nonconstitutive_list = set(ptm_info[ptm_info['PTM Conservation Score'] != 1].index.values) const_list = [] for i, row in tqdm(ptm_coordinates.iterrows(), total = ptm_coordinates.shape[0], desc = 'Checking for constitutive PTMs'): const_list.append(check_constitutive(row['PTM'], nonconstitutive_list)) ptm_coordinates['Constitutive'] = const_list #add flanking sequence information ptm_coordinates['Canonical Flanking Sequence'] = ptm_coordinates['PTM'].apply(lambda x: ptm_info.loc[x, 'Flanking Sequence'] if x == x else np.nan) #drop unnecessary columns and save #ptm_coordinates = ptm_coordinates.drop(columns = ['PTM Position in Canonical Isoform', 'PTM'], axis = 1) ptm_coordinates = ptm_coordinates[['Gene name', 'UniProtKB Accession', 'Isoform ID', 'Isoform Type', 'Residue', 'PTM Position in Isoform', 'Modification', 'Modification Class', 'Chromosome/scaffold name', 'Strand', 'Gene Location (hg38)', 'Gene Location (hg19)', 'Gene Location (hg18)', 'Constitutive', 'Canonical Flanking Sequence', 'Source of PTM']] ptm_coordinates = ptm_coordinates.reset_index() #there will be some non-unique genomic coordinates, so reset index to ensure unique index values #separate out modification class (important for functional analysis) ptm_coordinates = separate_by_modification(ptm_coordinates, mod_mapper) #add filtering criteria ## add number of compendia pscout_data = extract_number_compendia(pscout_data_file) ptm_coordinates = append_num_compendia(ptm_coordinates, pscout_data) ## number of MS experiments (from PhosphoSitePlus, fill in any remainder with ProteomeScout experiments) psp_df = generate_ptm_coordinates.extract_num_studies(phosphositeplus_filepath, pscout_data=pscout_data) ptm_coordinates = generate_ptm_coordinates.append_num_studies(ptm_coordinates, psp_df) #do some final custom editing for outlier examples missing_db_info = ptm_coordinates[ptm_coordinates[['MS_LIT', 'MS_CST', 'LT_LIT', 'Compendia']].isna().all(axis = 1)] ptm_coordinates = ptm_coordinates[~ptm_coordinates[['MS_LIT', 'MS_CST', 'LT_LIT', 'Compendia']].isna().all(axis = 1)] #custom fixes ms_lit = {'Q13765_K108':1, 'Q13765_K100':2} ms_cst = {'Q13765_K108':np.nan, 'Q13765_K100':np.nan} lt_lit = {'Q13765_K108':np.nan, 'Q13765_K100':np.nan} custom_compendia = {'Q13765_K108':'PhosphoSitePlus;ProteomeScout', 'Q13765_K100': 'PhosphoSitePlus;ProteomeScout'} custom_num_compendia = {'Q13765_K108':2, 'Q13765_K100':2} for i,row in missing_db_info.iterrows(): ptm = row['PTM'] missing_db_info.loc[i, 'MS_LIT'] = ms_lit[ptm] missing_db_info.loc[i, 'MS_CST'] = ms_cst[ptm] missing_db_info.loc[i, 'LT_LIT'] = lt_lit[ptm] missing_db_info.loc[i, 'Compendia'] = custom_compendia[ptm] missing_db_info.loc[i, 'Number of Compendia'] = custom_num_compendia[ptm] ptm_coordinates = pd.concat([ptm_coordinates, missing_db_info]) ptm_coordinates[['Number of Compendia', 'Number of Experiments', 'MS_LIT', 'MS_CST', 'LT_LIT']] = ptm_coordinates[['Number of Compendia', 'Number of Experiments', 'MS_LIT', 'MS_CST', 'LT_LIT']].fillna(0).astype(int) if output_dir is not None: ptm_coordinates.to_csv(output_dir + 'ptm_coordinates.csv', index = False) #write to text file indicating when the data was last updated with open(output_dir + 'last_updated.txt', 'w') as f: f.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) return ptm_coordinates
最新发布
07-08
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值