Source code for mgkit.net.pfam

"""
.. versionadded:: 0.2.3

This module defines routine to access Pfam information using a
network connection

"""
from . import url_open

PFAM_URL = "http://pfam.xfam.org/"


[docs]def get_pfam_families(key='id'): """ .. versionadded:: 0.2.3 Gets a dictionary with the accession/id/description of Pfam families from Pfam. This list can be accessed using the URL: http://pfam.xfam.org/families?output=text The output is a tab separated file where the fields are: * ACCESSION * ID * DESCRIPTION Arguments: key (str): if the value is *id*, the key of the dictionary is the ID, otherwise ID swaps position with ACCESSION (the new key) Returns: dict: by default the function returns a dictionary that uses the ID as key, while the value is a tuple (ACCESSION, DESCRIPTION). ID is the default because the :ref:`hmmer2gff` script output uses ID as *gene_id* value when using the HMM provided by Pfam """ families = {} for line in url_open(PFAM_URL + "families?output=text", stream=True): line = line.decode('utf8').strip() if line.startswith('#') or (not line): continue acc, p_id, description = line.strip().split('\t') if key != 'id': p_id, acc = acc, p_id families[p_id] = (acc, description) return families