Source code for pmaf.remote._entrez._entrez

from ._base import EntrezBase
from typing import Optional, Union

# TODO: This class was coded during early PhyloMAF development days, therefore it should be completely revised.


[docs]class Entrez(EntrezBase): """Remote class responsible for batch data fetching from :term:`NCBI`'s Entrez :term:`API` :cite:t:`schuler10EntrezMolecular1996`""" def __init__( self, email: str, api_key: Optional[str] = None, tool: str = "PhyloMAF" ): """Create Entrez instance to connect to :term:`NCBI` database. Parameters ---------- email User's email that will be passed to :term:`NCBI` server api_key User's :term:`API` key obtained from :term:`NCBI` account page. Default is None(without key). However, it is recommended to obtain an :term:`API` key for faster access. tool Label of the tool that will be passed to :term:`NCBI` server. Default is "PhyloMAF"` """ super().__init__(email, api_key, tool)
[docs] def get_taxid_by_query(self, query: str) -> Union[bool, str]: """Fetch :term:`NCBI` taxonomy database using `query` and get a :term:`NCBI`'s internal :term:`taxid`. Parameters ---------- query Query string to search for Returns ------- False if fail or :term:`taxid` """ ret = False raw_data = self._request_taxid_by_query(query) if raw_data: taxid = self._get_top_taxid(raw_data) ret = taxid return ret
[docs] def get_genome_id_by_taxid(self, taxid: str) -> Union[bool, str]: """Get :term:`NCBI`'s genome ID by target :term:`taxid` Parameters ---------- taxid Target :term:`taxid` to look for Returns ------- False if fails or genome ID. """ ret = False raw_data = self._request_genomes_by_taxid(taxid) if raw_data: genome_id = self._get_top_genome_id(raw_data) ret = genome_id return ret
[docs] def get_chromosome_id_by_genome_id(self, genome_id: str) -> Union[bool, str]: """Get :term:`NCBI`'s chromosome ID by target `genome_id` Parameters ---------- genome_id Target genome ID to look for Returns ------- False if fails or chromosome ID """ ret = False raw_data = self._request_chromosomes_by_genome_id(genome_id) if raw_data: chromosome_id = self._get_top_chromosome_id(raw_data) ret = chromosome_id return ret
[docs] def get_gene_features_by_chromosome_id( self, chromosome_id: str, gene: str = "rRNA" ) -> Union[bool, list]: """Get gene features by chromosome ID. Parameters ---------- chromosome_id Target :term:`NCBI` chromosome ID gene Target gene type. Default is "rRNA". Disclaimer: method was not tested for other gene types. Returns ------- False if failed or string with extracted gene features from :term:`NCBI`'s feature table. """ ret = False raw_io = self._request_sequence_feature_table_by_acc_id(chromosome_id, True) if raw_io: gene_features = self.filter_genes_from_future_table(raw_io, gene) raw_io.close() ret = gene_features return ret
[docs] def get_fasta_sequence_by_param( self, accession_id: str, start_pos: Union[str, int], stop_pos: Union[str, int], strand: Union[str, int], ) -> str: """Get FASTA string from :term:`NCBI` database by `accesion_id` Parameters ---------- accession_id Accession number of the "Nucleotide" database of :term:`NCBI`. start_pos Start position of sequence to crop stop_pos Stop position of the sequence to crop strand Strand of the sequence. Can be either "1" or "2". For more details check :term:`NCBI` documentation. Returns ------- String with sequnce in FASTA format. """ ret = False if strand in ["1", "2"]: ret = self._request_sequence_fasta( accession_id, start_pos, stop_pos, strand ) return ret