o
    Rŀg,                     @   s.   d Z dd Zdd ZG dd dZdd Zd	S )
a~  Parser for the prosite dat file from Prosite at ExPASy.

See https://www.expasy.org/prosite/

Tested with:
 - Release 20.43, 10-Feb-2009
 - Release 2017_03 of 15-Mar-2017.

Functions:
 - read                  Reads a Prosite file containing one Prosite record
 - parse                 Iterates over records in a Prosite file.

Classes:
 - Record                Holds Prosite data.

c                 c   s    	 t | }|s
dS |V  q)zParse Prosite records.

    This function is for parsing Prosite files containing multiple
    records.

    Arguments:
     - handle   - handle to the file.

    TN)__read)handlerecord r   F/var/www/html/myenv/lib/python3.10/site-packages/Bio/ExPASy/Prosite.pyparse   s   
r   c                 C   s    t | }|  }|rtd|S )zRead one Prosite record.

    This function is for parsing Prosite files containing
    exactly one record.

    Arguments:
     - handle   - handle to the file.

    z"More than one Prosite record found)r   read
ValueError)r   r   	remainderr   r   r   r   +   s
   
r   c                   @   s   e Zd ZdZdd ZdS )Recordag  Holds information from a Prosite record.

    Main attributes:
     - name           ID of the record.  e.g. ADH_ZINC
     - type           Type of entry.  e.g. PATTERN, MATRIX, or RULE
     - accession      e.g. PS00387
     - created        Date the entry was created.  (MMM-YYYY for releases
       before January 2017, DD-MMM-YYYY since January 2017)
     - data_update    Date the 'primary' data was last updated.
     - info_update    Date data other than 'primary' data was last updated.
     - pdoc           ID of the PROSITE DOCumentation.
     - description    Free-format description.
     - pattern        The PROSITE pattern.  See docs.
     - matrix         List of strings that describes a matrix entry.
     - rules          List of rule definitions (from RU lines).  (strings)
     - prorules       List of prorules (from PR lines). (strings)

    NUMERICAL RESULTS:
     - nr_sp_release  SwissProt release.
     - nr_sp_seqs     Number of seqs in that release of Swiss-Prot. (int)
     - nr_total       Number of hits in Swiss-Prot.  tuple of (hits, seqs)
     - nr_positive    True positives.  tuple of (hits, seqs)
     - nr_unknown     Could be positives.  tuple of (hits, seqs)
     - nr_false_pos   False positives.  tuple of (hits, seqs)
     - nr_false_neg   False negatives.  (int)
     - nr_partial     False negatives, because they are fragments. (int)

    COMMENTS:
     - cc_taxo_range  Taxonomic range.  See docs for format
     - cc_max_repeat  Maximum number of repetitions in a protein
     - cc_site        Interesting site.  list of tuples (pattern pos, desc.)
     - cc_skip_flag   Can this entry be ignored?
     - cc_matrix_type
     - cc_scaling_db
     - cc_author
     - cc_ft_key
     - cc_ft_desc
     - cc_version     version number (introduced in release 19.0)

    The following are all lists if tuples (swiss-prot accession, swiss-prot name).

    DATA BANK REFERENCES:
     - dr_positive
     - dr_false_neg
     - dr_false_pos
     - dr_potential   Potential hits, but fingerprint region not yet available.
     - dr_unknown     Could possibly belong
     - pdb_structs    List of PDB entries.

    c                 C   s   d| _ d| _d| _d| _d| _d| _d| _d| _d| _g | _	g | _
g | _g | _d| _d| _d| _d| _d| _d| _d| _d| _d| _d| _g | _d| _g | _g | _g | _g | _g | _g | _dS )zInitialize the class. )NNN)nametype	accessioncreateddata_updateinfo_updatepdocdescriptionpatternmatrixrulesprorulespostprocessingnr_sp_release
nr_sp_seqsnr_totalnr_positive
nr_unknownnr_false_posnr_false_neg
nr_partialcc_taxo_rangecc_max_repeatcc_sitecc_skip_flagdr_positivedr_false_negdr_false_posdr_potential
dr_unknownpdb_structs)selfr   r   r   __init__q   s>   
zRecord.__init__N)__name__
__module____qualname____doc__r,   r   r   r   r   r
   =   s    3r
   c                 C   s  dd l }d }| D ]}|d d |dd   }}|dkrAt }|d}t|dkr3td| |d |_|d d|_q|d	krL|d
|_q|dkr|dd}|d 	drk|d 
ddd |_ntd| |d 	dr|d 
ddd |_ntd| |d 	dr|d 
ddd |_qtd| |dkr||_q|dkr| j|7  _q|dkr|j| q|dkr|j|d
 q|dkr|j| q|dkrv|d
}|D ]}|sqdd |dD \}	}
|	dkr|
d\}}||_t||_q|	dkrt|
|_q|	dkr&t|
|_q|	dv rk|d|
}|s>td |
 d!|ttt| }|	d"krP||_q|	d#krY||_ q|	d$krb||_!q|	d%krj||_"qtd&|	 d!|q|d'kr%|d
}|D ]}|r|d d( d)krq|#ddkrqd*d |dD \}	}
|	d+kr|
|_$q|	d,kr|
|_%q|	d-kr|
d\}}|j&t||f q|	d.kr|
|_'q|	d/kr|
|_(q|	d0kr|
|_)q|	d1kr|
|_*q|	d2kr|
|_+q|	d3kr|
|_,q|	d4kr|
|_-qtd&|	 d!|q|d5kr|d
}|D ]e}|s8q1d6d |dD \}}}|d7krT|j.||f q1|d8krc|j/||f q1|d9krr|j0||f q1|d:kr|j1||f q1|d;kr|j2||f q1td<| q|d=kr| }|D ]}|j3|d
 qq|d>kr|d
}|j4| q|d?kr|d
|_5q|d@kr|sq n
tdA| dBd S |stdC|S )DN          IDz; z'I don't understand identification line
   .AC;DT)z
 (CREATED)z CREATED zI don't understand date line
)z (DATA UPDATE)z DATA UPDATE)z (INFO UPDATE)z INFO UPDATEDEPAMAPPRUNRc                 s       | ]}|  V  qd S Nlstrip.0wordr   r   r   	<genexpr>       z__read.<locals>.<genexpr>=z/RELEASE,z
/FALSE_NEGz/PARTIAL)/TOTAL	/POSITIVE/UNKNOWN
/FALSE_POSz(\d+)\((\d+)\)zBroken data z in comment line
rL   rM   rN   rO   zUnknown qual CC   zAutomatic scalingc                 s   rA   rB   rC   rE   r   r   r   rH      rI   z/TAXO-RANGEz/MAX-REPEATz/SITEz
/SKIP-FLAGz/MATRIX_TYPEz/SCALING_DBz/AUTHORz/FT_KEYz/FT_DESCz/VERSIONDRc                 s   rA   rB   )striprE   r   r   r   rH     rI   TFNP?zI don't understand type flag 3DPRDOz//zUnknown keyword z foundzUnexpected end of stream.)6rerstripr
   splitlenr   r   r   r   endswithrsplitr   r   r   r   r   r   appendr   extendr   r   intr   r   r    match	Exceptiontuplemapgroupsr   r   r   r   countr!   r"   r#   r$   cc_matrix_typecc_scaling_db	cc_author	cc_ft_key
cc_ft_desc
cc_versionr%   r'   r&   r(   r)   r*   r   r   )r   r\   r   linekeywordvaluecolsdatescolqualdatareleaseseqsmhitsposdescrefsrefaccr   r   idr   r   r   r   r      s  


































#










r   N)r0   r   r   r
   r   r   r   r   r   <module>   s
   _