o
    Rŀg|,                     @   s   d Z ddlZddlZddlmZ ddlmZ ddlmZ G dd dejZG dd	 d	e	Z
d
d Zdd Zdd Zdd Zdd Zdd Zdd ZdS )zJASPAR2014 module.    N)Align)motifs)Seqc                   @   sd   e Zd ZdZ													dddZedd Zedd	 Zd
d Zdd Z	dd Z
dS )Motifa  A subclass of Bio.motifs.Motif used to represent a JASPAR profile.

    Additional metadata information are stored if available. The metadata
    availability depends on the source of the JASPAR motif (a 'pfm' format
    file, a 'jaspar' format file or a JASPAR database).
    ACGTNc                 C   s^   t j| ||| || _|| _|| _|| _|| _|	| _|
| _	|| _
|| _|| _|| _|| _dS )z"Construct a JASPAR Motif instance.N)r   r   __init__name	matrix_id
collectiontf_class	tf_familyspecies	tax_groupacc	data_typemedlinepazar_idcomment)selfr	   r   alphabet	alignmentcountsr
   r   r   r   r   r   r   r   r   r    r   N/var/www/html/myenv/lib/python3.10/site-packages/Bio/motifs/jaspar/__init__.pyr      s   
zMotif.__init__c                 C   s   t | j\}}|S )z!Return the JASPAR base matrix ID.split_jaspar_idr	   )r   base_id__r   r   r   r   =      zMotif.base_idc                 C   s   t | j\}}|S )z!Return the JASPAR matrix version.r   )r   r   versionr   r   r   r   C   r   zMotif.versionc                 C   s  d| j  d}d| j d}d||g}| jr&d| j d}d||g}| jr7d| j d}d||g}| jrHd| j d}d||g}| jr\dd	| j d}d||g}| jrmd
| j d}d||g}| jr~d| j d}	d||	g}| j	rd| j	 d}
d||
g}| j
rd| j
 d}d||g}| jrd| j d}d||g}| jrd| j d}d||g}d| j d}d||g}|S )zReturn a string representation of the JASPAR profile.

        We choose to provide only the filled metadata information.
        zTF name	
z
Matrix ID	 zCollection	z	TF class	z
TF family	zSpecies	,zTaxonomic group	z
Accession	zData type used	zMedline	z	PAZAR ID	z	Comments	zMatrix:
z

)r   r	   joinr
   r   r   r   r   r   r   r   r   r   r   )r   tf_name_strmatrix_id_str
the_stringcollection_strtf_class_strtf_family_strspecies_strtax_group_stracc_strdata_type_strmedline_strpazar_id_strcomment_str
matrix_strr   r   r   __str__I   sH   zMotif.__str__c                 C   s
   | j  S )zvReturn the hash key corresponding to the JASPAR profile.

        :note: We assume the unicity of matrix IDs

        )r	   __hash__r   r   r   r   r3   s   s   
zMotif.__hash__c                 C   s   | j |j kS )z'Return True if matrix IDs are the same.r	   )r   otherr   r   r   __eq__{   s   zMotif.__eq__)r   NNNNNNNNNNNN)__name__
__module____qualname____doc__r   propertyr   r   r2   r3   r7   r   r   r   r   r      s.    
$

*r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	RecordzaRepresent a list of jaspar motifs.

    Attributes:
     - version: The JASPAR version used

    c                 C   s
   d| _ dS )zInitialize the class.N)r   r4   r   r   r   r      s   
zRecord.__init__c                 C   s   d dd | D S )z,Return a string of all motifs in the Record.r    c                 s   s    | ]}t |V  qd S N)str).0	the_motifr   r   r   	<genexpr>   s    z!Record.__str__.<locals>.<genexpr>)r#   r4   r   r   r   r2      s   zRecord.__str__c                 C   s   i }| D ]}|||j < q|S )z8Return the list of matrices as a dictionary of matrices.r5   )r   dicmotifr   r   r   to_dict   s   zRecord.to_dictN)r8   r9   r:   r;   r   r2   rE   r   r   r   r   r=      s
    r=   c                 C   sP   |  }|dkrt| }|S |dkrt| }|S |dkr"t| }|S td| )zRead motif(s) from a file in one of several different JASPAR formats.

    Return the record of PFM(s).
    Call the appropriate routine based on the format passed.
    pfmsitesjasparUnknown JASPAR format %s)lower	_read_pfm_read_sites_read_jaspar
ValueError)handleformatrecordr   r   r   read   s   rR   c              	   C   s  d}g }|dkr+| d }|j }|D ]}dd || D }d| d}|| qnQ|dkrv| D ]C}	|	j }z|	j}
W n tyF   d	}
Y nw d
|
 d|	j d}|| |D ]}dd || D }| dd| d}|| qXq1ntd| d|}|S )z@Return the representation of motifs in "pfm" or "jaspar" format.r   rF   r   c                 S      g | ]}|d qS z6.2fr   r@   valuer   r   r   
<listcomp>       zwrite.<locals>.<listcomp> r    rH   N>c                 S   rS   rT   r   rU   r   r   r   rW      rX   z [z]
rI   r!   )r   r#   appendr	   AttributeErrorr   rN   )r   rP   letterslinesrD   r   lettertermslinemr	   textr   r   r   write   s:   


rd   c                 C   s~   d}i }t || D ]\}}| }|d |kr|dd }dd |D ||< q	tdd||d}d|j |_t }|| |S )	z1Read the motif from a JASPAR .pfm file (PRIVATE).r   r      Nc                 S      g | ]}t |qS r   floatr@   xr   r   r   rW      rX   z_read_pfm.<locals>.<listcomp>)r	   r   r   r   *)zipsplitr   lengthmaskr=   r[   )rO   r   r   r_   ra   wordsrD   rQ   r   r   r   rK      s   
rK   c           	      C   s   d}g }| D ]'}| ds nt| }d}| D ]
}| r#||7 }qt|}|| qt|}tdd||d}d|j	 |_
t }|| |S )z1Read the motif from JASPAR .sites file (PRIVATE).r   rZ   r!   N)r	   r   r   r   rk   )
startswithnextstripisupperr   r[   r   	Alignmentr   rn   ro   r=   )	rO   r   	instancesra   instancecr   rD   rQ   r   r   r   rL      s&   


rL   c              	   C   sb  d}i }t  }td}td}td}d}d}d}	g d}
| D ]}| }||}||}||}|rL|d}|d	rI|d	}q"|}q"|r|dd
\}}| }dd |D ||< |	d7 }	|	dkr~|t||||d d}d}i }d}	q"|r|d }dd |D ||
|	 < |	d7 }	|	dkr|t||||d d}d}i }d}	q"|S )at  Read motifs from a JASPAR formatted file (PRIVATE).

    Format is one or more records of the form, e.g.::

      - JASPAR 2010 matrix_only format::

                >MA0001.1 AGL3
                A  [ 0  3 79 40 66 48 65 11 65  0 ]
                C  [94 75  4  3  1  2  5  2  3  3 ]
                G  [ 1  0  3  4  1  0  5  3 28 88 ]
                T  [ 2 19 11 50 29 47 22 81  1  6 ]

      - JASPAR 2010-2014 PFMs format::

                >MA0001.1 AGL3
                0	3	79	40	66	48	65	11	65	0
                94	75	4	3	1	2	5	2	3	3
                1	0	3	4	1	0	5	3	28	88
                2	19	11	50	29	47	22	81	1	6

    r   z^>\s*(\S+)(\s+(\S+))?z\s*([ACGT])\s*\[\s*(.*)\s*\]z
\s*(.+)\s*Nr   )ACGTre         c                 S   rf   r   rg   ri   r   r   r   rW   /  rX   z _read_jaspar.<locals>.<listcomp>   )r   r   c                 S   rf   r   rg   ri   r   r   r   rW   9  rX   )	r=   recompilers   matchgrouprm   r[   r   )rO   r   r   rQ   head_patrow_pat_longrow_pat_short
identifierr   	row_countnucleotidesra   
head_matchrow_match_longrow_match_shortr_   
counts_strrp   r   r   r   rM      sX   







rM   c                    s   j }j}d}tjD ] |t fdd|D 7 }q|j }t|}|r/t|}ntt	|d}t|
 }i }|D ]}||  |  < |||  ||< qA|S )zCalculate pseudocounts.

    Computes the root square of the total number of sequences multiplied by
    the background nucleotide.
    r   c                 3   s    | ]
}j |   V  qd S r>   )r   )r@   r_   irD   r   r   rB   R  s    z)calculate_pseudocounts.<locals>.<genexpr>g      ?)r   
backgroundrangern   summathsqrtdictfromkeyssortedvalues)rD   r   r   totalavg_nb_instancessq_nb_instancespseudocountsr_   r   r   r   calculate_pseudocountsE  s    


r   c                 C   sB   |  d}d}d}t|dkr|d }|d }||fS | }||fS )zSplit a JASPAR matrix ID into its component.

    Components are base ID and version number, e.g. 'MA0047.2' is returned as
    ('MA0047', 2).
    .Nr~   r   re   )rm   len)idid_splitr   r   r   r   r   r   f  s   
r   )r;   r   r   Bior   r   Bio.Seqr   r   listr=   rR   rd   rK   rL   rM   r   r   r   r   r   r   <module>   s   o!I!