/**************************************************************************** ** ** @source ajpdbio.c ** ** Data structures and functions for reading and writing PDB format files. ** Includes functions for writing a Pdb object (defined in ajpdb.h). ** ** Copyright (c) 2004 Jon Ison ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public License ** as published by the Free Software Foundation; either version 2 ** of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ** ** Notes ** Look for JONNEW for latest edits. ** ****************************************************************************/ /* ======================================================================= */ /* ============================ include files ============================ */ /* ======================================================================= */ #include "ajax.h" /* ======================================================================= */ /* ============================ private data ============================= */ /* ======================================================================= */ /* @datastatic AjPElement ***************************************************** ** ** Nucleus Element object. ** ** Object for holding a single secondary structure element when parsing PDB. ** ** AjPElement is implemented as a pointer to a C data structure. ** ** ** ** @alias AjSElement ** @alias AjOElement ** ** ** ** @attr elementId [AjPStr] Element identifier (columns 12 - 14) ** SHEET ('E') or TURN ('T') ** @attr initResName [AjPStr] Name of first residue in each element (columns ** 16 - 18 (HELIX & TURN) or 18 - 20 (SHEET) ) ** @attr initSeqNum [AjPStr] Residue number (including insertion code) ** of first residue in each element ** (columns 22 - 26 (HELIX), 23 - 27 (SHEET) ** or 21 - 25 (TURN) ) ** @attr endResName [AjPStr] Name of last residue in each element ** (columns 28 - 30 (HELIX), 29 - 31 (SHEET) ** or 27 - 29 (TURN) ) ** @attr endSeqNum [AjPStr] Residue number (including insertion code) of ** last residue in each element ** (columns 34 - 38 (HELIX and SHEET) ** or 32 - 36 (TURN) ) ** @attr helixClass [ajint] Classes of helices (columns 39 - 40), ** an int from 1-10 from ** @attr elementNum [ajint] Serial number of the element (columns 8 - 10) ** @attr elementType [char] Element type COIL ('C'), HELIX ('H'), ** @attr chainId [char] Chain identifiers for chains containing the ** elements (column 20 (HELIX & TURN) ** or 22 (SHEET) ) ** @attr Padding [char[6]] Padding to alignment boundary ** ** http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html
** ** TYPE OF HELIX CLASS NUMBER (COLUMNS 39 - 40)
** --------------------------------------------------------------
** Right-handed alpha (default) 1
** Right-handed omega 2
** Right-handed pi 3
** Right-handed gamma 4
** Right-handed 310 5
** Left-handed alpha 6
** Left-handed omega 7
** Left-handed gamma 8
** 27 ribbon/helix 9
** Polyproline 10
** ** @@ ****************************************************************************/ typedef struct AjSElement { AjPStr elementId; AjPStr initResName; AjPStr initSeqNum; AjPStr endResName; AjPStr endSeqNum; ajint helixClass; ajint elementNum; char elementType; char chainId; char Padding[6]; }AjOElement; #define AjPElement AjOElement* /* @datastatic AjPElements **************************************************** ** ** Nucleus Elements object. ** ** Object for holding secondary structure elements from a PDB file ** ** AjPElements is implemented as a pointer to a C data structure. ** ** ** ** @alias AjSElements ** @alias AjOElements ** ** ** ** @attr elms [AjPElement*] Secondary structure element array ** @attr n [ajint] Total no. of secondary structure elements ** (helices, strands or turns) ** @attr Padding [char[4]] Padding to alignment boundary ** @@ ****************************************************************************/ typedef struct AjSElements { AjPElement *elms; ajint n; char Padding[4]; } AjOElements; #define AjPElements AjOElements* /* @datastatic AjPPdbfile ***************************************************** ** ** Nucleus Pdbfile object. ** ** Holds a pdb file for parsing. ** ** AjPPdbfile is implemented as a pointer to a C data structure. ** ** ** ** @alias AjSPdbfile ** @alias AjOPdbfile ** ** ** ** @attr pdbid [AjPStr] 4 character pdb id code ** @attr tercnt [ajint] The number of TER records in the pdb file ** @attr toofewter [AjBool] True if the file contained too few TER records ** @attr modcnt [ajint] The number of MODEL records in the pdb file ** (does not count duplicate MODEL records ** that are masked out) ** @attr nomod [AjBool] True if the file contained no MODEL records ** @attr compnd [AjPStr] Text from COMPND records ** @attr source [AjPStr] Text from SOURCE records ** @attr reso [float] Resolution of structure ** @attr method [ajint] Structural method, either ajXRAY or ajNMR ** @attr gpid [AjPChar] Array of chain (group) id's for groups that ** cannot be associated with a chain in the SEQRES ** section ** @attr idxfirst [ajint] Index in of first ATOM, HETATM or MODEL ** line ** ** @attr nchains [ajint] Number of chains (from SEQRES record) ** for sizes of following attribute arrays ** @attr seqres [AjPStr*] Array of sequences taken from the SEQRES records ** @attr seqresful [AjPStr*] Array of sequences using 3 letter codes taken ** from the SEQRES records ** @attr nres [ajint*] Number of residues in each chain ** @attr chainok [AjBool*] Array of flags which are True if a chain in the ** SEQRES record contains >= minimum no. of amino ** acids and has a unique chain identifier ** @attr resn1ok [AjBool*] Bool's for each chain which are TRUE if resn1 ** was used to derive resni, i.e. gave correct ** alignment t seqres sequence. ** If False then resn2 was used. ** @attr nligands [ajint*] Number of ligands for each chain. A ligand is a ** non-protein group associated with a chain ** in the SEQRES section. ** @attr numHelices [ajint*] No. of helices in each chain ** @attr numStrands [ajint*] No. of strands in each chain ** @attr numSheets [ajint*] No. of sheets in each chain ** @attr numTurns [ajint*] No. of turns in each chain ** @attr chid [AjPChar] Array of chain id's for chains from SEQRES ** records ** ** @attr lines [AjPStr*] Array of lines in the pdb file ** @attr linetype [ajint*] Array of int's describing the lines, have values ** of PDBPARSE_IGNORE (do not consider this line ** when parsing coordinates from the file), ** PDBPARSE_COORD (coordinate line (ATOM or HETATM ** record) for protein atoms, PDBPARSE_COORDHET ** (coordinate line for non-protein atoms), ** PDBPARSE_COORDGP (oordinate line for groups ** that could not be associated with a SEQRES chain), PDBPARSE_COORDWAT (oordinate line for ** water), PDBPARSE_TER (it is a TER record) or ** PDBPARSE_MODEL (it is a MODEL record). ** @attr chnn [ajint*] Array of chain numbers for each PDBPARSE_COORD & ** PDBPARSE_COORDHET line. ** @attr gpn [ajint*] Array of group numbers for each line. Each group ** (heterogen) is given a group number, that is ** either relative to a chain or the whole file ** (for groups that could not be associated with ** a chain from the SEQRES records) ** @attr modn [ajint*] Array of model numbers for each PDBPARSE_COORD ** line ** ** @attr resni [ajint*] Residue numbers for each PDBPARSE_COORD line. ** These give the correct index into the 'seqres' ** sequences ** @attr resn1 [ajint*] Array of residue numbers for each PDBPARSE_COORD ** line. This is pdbn converted to a sequential ** integer where alternative residue numbering is ** presumed for lines where line[26] is used ** (residues for which oddnum==True are considered). ** @attr resn2 [ajint*] Array of residue numbers for each PDBPARSE_COORD ** line. This is pdbn converted to a sequential ** integer where heterogeneity is presumed for ** lines where line[26] is used (residues where ** oddnum==True are ignored). ** @attr pdbn [AjPStr*] Array with a residue number for each line ** for which == ajTrue. This is the ** original residue number string (including ** insertion code) from the pdb file ** @attr oddnum [AjBool*] Bool's for each line which are TRUE for ** duplicate residues of heterogenous positions ** (e.g. if 2 different residues are both numbered ** '8' or one is '8' and the other '8A' ** for example then would be set True for ** the second residue. Heterogeneity is indicated ** by a character in position lines[26] (the same ** position used to indicate alternative residue ** numbering schemes). ** @attr atype [AjPStr*] Atom type for each line ** @attr rtype [AjPStr*] Residue type for each line ** @attr x [float*] x-coordinate for each line ** @attr y [float*] y-coordinate for each line ** @attr z [float*] z-coordinate for each line ** @attr o [float*] occupancy for each line ** @attr b [float*] thermal factor for each line ** ** @attr elementNum [ajint*] Serial number of the secondary structure element ** (columns 8 - 10) ** @attr elementId [AjPStr*] Secondary structure element identifier (columns ** 12 - 14) ** @attr elementType [char*] Secondary structure element type COIL ('C'), ** HELIX ('H'), SHEET ('E') or TURN ('T') ** @attr helixClass [ajint*] Classes of helices (columns 39 - 40) from ** http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html ** (see below). Has a value of 0 (printed out as ** '.') for non-helical elements. ** @attr nlines [ajint] Number of lines in the pdb file and size of the ** following arrays ** @attr ngroups [ajint] Number of groups (non-protein groups that ** could not be associated with a chain in the ** SEQRES section ** @@ ******************************************************************************/ typedef struct AjSPdbfile { AjPStr pdbid; ajint tercnt; AjBool toofewter; ajint modcnt; AjBool nomod; AjPStr compnd; AjPStr source; float reso; ajint method; AjPChar gpid; ajint idxfirst; ajint nchains; AjPStr *seqres; AjPStr *seqresful; ajint *nres; AjBool *chainok; AjBool *resn1ok; ajint *nligands; ajint *numHelices; ajint *numStrands; ajint *numSheets; ajint *numTurns; AjPChar chid; AjPStr *lines; ajint *linetype; ajint *chnn; ajint *gpn; ajint *modn; ajint *resni; ajint *resn1; ajint *resn2; AjPStr *pdbn; AjBool *oddnum; AjPStr *atype; AjPStr *rtype; float *x; float *y; float *z; float *o; float *b; ajint *elementNum; AjPStr *elementId; char *elementType; ajint *helixClass; ajint nlines; ajint ngroups; }AjOPdbfile; #define AjPPdbfile AjOPdbfile* #define POS_CHID 21 /* Position in ATOM line of chain id. */ #define PDBPARSE_IGNORE 0 /* Ignore this line when parsing coordinates.*/ #define PDBPARSE_COORD 1 /* Coordinate line. */ #define PDBPARSE_COORDHET 2 /* Coordinate line for non-protein atoms. */ #define PDBPARSE_TER 3 /* TER record. */ #define PDBPARSE_MODEL 4 /* MODEL record. */ #define PDBPARSE_COORDGP 5 /* Coordinate line for groups that could not be associated with a SEQRES chain. */ #define PDBPARSE_COORDWAT 6 /* Coordinate line for water. */ #define PDBPARSE_ENDMDL 7 /* ENDMDL record. */ #define MAXMISSNTERM 3 /* A number of residues may be missing from the N-terminus of the SEQRES records relative to the ATOM records (e.g. MET and ACE often do not appear). The parser will search and correct for such cases. MAXMISSNTERM is the maximum number of such missing residues that can be accounted for. */ /* Flags for printing in PDB format */ enum flags { ajHEADER_DOMAIN = 1, ajSEQRES_DOMAIN, ajATOMPDB_DOMAIN, ajATOMIDX_DOMAIN, ajSEQRES_CHAIN, ajATOMPDB_CHAIN, ajATOMIDX_CHAIN, ajHETEROGEN, ajHEADER, ajTITLE, ajCOMPND, ajSOURCE, ajEMPTYREMARK, ajRESOLUTION }; /*THIS_DIAGNOSTIC */ AjPStr tempstr; AjPFile tempfile; /* ======================================================================= */ /* ================= Prototypes for private functions ==================== */ /* ======================================================================= */ /* These functions are for parsing PDB files */ /*THIS_DIAGNOSTIC static void diagnostic(AjPPdbfile *pdbfile, ajint n); */ static void PdbfileDel(AjPPdbfile *pthis); static AjPPdbfile ReadLines(AjPFile inf); static AjPPdbfile PdbfileNew(ajint nlines, ajint nchains); static AjBool FirstPass(AjPPdbfile pdbfile, AjPFile logf, AjPElements *elms, AjBool camask); static AjBool CheckChains(AjPPdbfile pdbfile, AjPFile logf, ajint min_chain_size); static AjBool SeqresToSequence(const AjPStr seqres, AjPStr *seq, AjBool camask, ajint *len); static AjBool CheckTer(AjPPdbfile pdbfile, AjPFile logf); static AjBool NumberChains(AjPPdbfile pdbfile, AjPFile logf); static AjBool NoMoreAtoms(AjPPdbfile pdbfile, ajint linen); static AjBool MaskChains(AjPPdbfile pdbfile, AjPFile logf, ajint min_chain_size, AjBool camask, AjBool camask1, AjBool atommask); static AjBool StandardiseNumbering(AjPPdbfile pdbfile, AjPFile logf); static AjBool AlignNumbering(AjPPdbfile pdbfile, AjPFile logf, ajuint lim, ajuint lim2); static AjBool PdbfileToPdb(AjPPdb *ret, AjPPdbfile pdb); static ajint PdbfileFindLine(const AjPPdbfile pdb, ajint chn, ajint which, ajint pos); /* Functions for Element object */ static AjPElements ElementsNew(ajint nchains); static void ElementsDel(AjPElements *ptr); static AjPElement ElementNew(void); static void ElementDel(AjPElement *ptr); static AjBool PdbfileChain(char id, const AjPPdbfile pdb, ajint *chn); static AjBool WriteElementData(AjPPdbfile pdbfile, AjPFile logf, const AjPElements elms); /* These functions are called by ajPdbWriteDomainRecordRaw */ static AjBool WriteHeaderScop(AjPFile outf, const AjPScop scop); static AjBool WriteSeqresDomain(AjPFile errf, AjPFile outf, const AjPPdb pdb, const AjPScop scop); static AjBool WriteAtomDomainPdb(AjPFile errf, AjPFile outf, const AjPPdb pdb, const AjPScop scop, ajint mod); static AjBool WriteAtomDomainIdx(AjPFile errf, AjPFile outf, const AjPPdb pdb, const AjPScop scop, ajint mod); static AjBool WriteAtomDomain(AjPFile errf, AjPFile outf, const AjPPdb pdb, const AjPScop scop, ajint mod, ajint mode); /* These functions are called by ajPdbWriteRecordRaw */ static AjBool WriteSeqresChain(AjPFile errf, AjPFile outf, const AjPPdb pdb, ajint chn); static AjBool WriteAtomChain(AjPFile outf, const AjPPdb pdb, ajint mod, ajint chn, ajint mode); static AjBool WriteHeterogen(AjPFile outf, const AjPPdb pdb, ajint mod); static AjBool WriteHeader(AjPFile outf, const AjPPdb pdb); static AjBool WriteTitle(AjPFile outf, const AjPPdb pdb); static AjBool WriteCompnd(AjPFile outf, const AjPPdb pdb); static AjBool WriteSource(AjPFile outf, const AjPPdb pdb); static AjBool WriteEmptyRemark(AjPFile outf, const AjPPdb pdb); static AjBool WriteResolution(AjPFile outf, const AjPPdb pdb); /* Others */ static AjBool WriteText(AjPFile outf, const AjPStr str, const char *prefix); /* ======================================================================= */ /* ========================== private functions ========================== */ /* ======================================================================= */ /* @funcstatic WriteSeqresChain ********************************************* ** ** Writes sequence for a protein chain to an output file in pdb format ** (SEQRES records). Sequence is taken from a Pdb structure. ** ** @param [w] errf [AjPFile] Output file stream for error messages ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** @param [r] chn [ajint] chain number, beginning at 1 ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteSeqresChain(AjPFile errf, AjPFile outf, const AjPPdb pdb, ajint chn) { ajint last_rn = 0; ajint this_rn; ajint x; ajint y; ajint len; const char *p; AjPStr tmp1 = NULL; AjPStr tmp2 = NULL; AjIList iter = NULL; AjPAtom atm = NULL; tmp1 = ajStrNew(); tmp2 = ajStrNew(); iter=ajListIterNewread(pdb->Chains[chn-1]->Atoms); /* Iterate through list of atoms */ while((atm=(AjPAtom)ajListIterGet(iter))) { /* ** Hard-coded to work on model 1 ** Break if a non-protein atom is found or model no. !=1 ** Continue / break if a non-protein atom is found or model no. !=1 */ if(atm->Mod!=1) break; if(atm->Type!='P') continue; /* If we are onto a new residue */ this_rn = atm->Idx; if(this_rn!=last_rn) { /* Assign sequence for residues missing from the linked list */ for(x=last_rn; xChains[chn-1]->Seq, x), &tmp2)) { ajWarn("Index out of range in WriteSeqresChain"); ajFmtPrintF(errf, "//\n%S\nERROR Index out " "of range in WriteSeqresChain\n", pdb->Pdb); ajStrDel(&tmp1); ajStrDel(&tmp2); ajListIterDel(&iter); return ajFalse; } else { ajStrAppendS(&tmp1, tmp2); ajStrAppendC(&tmp1, " "); } } ajStrAppendS(&tmp1, atm->Id3); ajStrAppendC(&tmp1, " "); last_rn=this_rn; } } /* Assign sequence for residues missing from end of linked list */ for(x=last_rn; xChains[chn-1]->Nres; x++) if(!ajResidueToTriplet( ajStrGetCharPos(pdb->Chains[chn-1]->Seq, x), &tmp2)) { ajStrDel(&tmp1); ajStrDel(&tmp2); ajListIterDel(&iter); ajWarn("Index out of range in WriteSeqresChain"); ajFmtPrintF(errf, "//\n%S\nERROR Index out of range " "in WriteSeqresChain\n", pdb->Pdb); return ajFalse; } else { ajStrAppendS(&tmp1, tmp2); ajStrAppendC(&tmp1, " "); } /* Print out SEQRES records */ for(p=ajStrGetPtr(tmp1), len=ajStrGetLen(tmp1), x=0, y=1; xChains[chn-1]->Id, pdb->Chains[chn-1]->Nres, p); /* Tidy up */ ajStrDel(&tmp1); ajStrDel(&tmp2); ajListIterDel(&iter); return ajTrue; } /* @funcstatic WriteSeqresDomain ******************************************** ** ** Writes sequence for a SCOP domain to an output file in pdb format (SEQRES ** records). Sequence is taken from a Pdb structure, domain definition is ** taken from a Scop structure. Where coordinates for multiple models (e.g. ** NMR structures) are given, data for model 1 are written. ** ** @param [w] errf [AjPFile] Output file stream for error messages ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** @param [r] scop [const AjPScop] Scop object ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteSeqresDomain(AjPFile errf, AjPFile outf, const AjPPdb pdb, const AjPScop scop) { ajint last_rn = 0; ajint this_rn; ajint x; ajint y; ajint z; ajint rcnt = 0; ajint len; ajint chn = -1; const char *p; char id; AjPStr tmp1 = NULL; AjPStr tmp2 = NULL; AjBool found_start = ajFalse; AjBool found_end = ajFalse; AjBool nostart = ajFalse; AjBool noend = ajFalse; AjIList iter = NULL; AjPAtom atm = NULL; AjPStr tmpstr = NULL; /* AjPResidue *resarr = NULL; */ /* Allocate strings etc */ tmp1 = ajStrNew(); tmp2 = ajStrNew(); tmpstr = ajStrNew(); /* Loop for each chain in the domain */ for(z=0;zN;z++,found_start=ajFalse, found_end=ajFalse, last_rn=0) { /* Check for error in chain id */ if(!ajPdbChnidToNum(scop->Chain[z], pdb, &chn)) { ajListIterDel(&iter); ajStrDel(&tmp1); ajStrDel(&tmp2); ajStrDel(&tmpstr); ajWarn("Chain incompatibility error in " "WriteSeqresDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Chain incompatibility " "error in WriteSeqresDomain\n", scop->Entry); return ajFalse; } /* if(resarr) AJFREE(resarr); ajListToarray(pdb->Chains[chn-1]->Residues, (void ***) &resarr); */ /* Intitialise iterator for list of atoms */ iter=ajListIterNewread(pdb->Chains[chn-1]->Atoms); /* Start of chain not specified */ if(!ajStrCmpC(scop->Start[z], ".")) nostart = ajTrue; else nostart = ajFalse; /* End of chain not specified */ if(!ajStrCmpC(scop->End[z], ".")) noend = ajTrue; else noend = ajFalse; /* Iterate through list of atoms */ while((atm=(AjPAtom)ajListIterGet(iter))) { /* Hard-coded to work on model 1 */ /* ** Continue / break if a non-protein atom is found or ** model no. !=1 */ if(atm->Mod!=1) break; if(atm->Type!='P') continue; /* If we are onto a new residue */ this_rn = atm->Idx; if(this_rn!=last_rn) { /* ** The start position was specified, but has not ** been found yet */ if(!found_start && !nostart) { ajStrAssignS(&tmpstr, scop->Start[z]); ajStrAppendK(&tmpstr, '*'); /* Start position found */ /* if(!ajStrCmpCaseS(atm->Pdb, scop->Start[z])) */ if(ajStrMatchWildS(atm->Pdb, tmpstr)) /* if(ajStrMatchWildS(resarr[atm->Idx-1]->Pdb, tmpstr)) */ { if(!ajStrMatchS(atm->Pdb, scop->Start[z])) /* if(!ajStrMatchS(resarr[atm->Idx-1]->Pdb, scop->Start[z])) */ { ajWarn("Domain start found by wildcard match " "only in WriteSeqresDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Domain start " "found " "by wildcard match only in " "WriteSeqresDomain\n", scop->Entry); } last_rn = this_rn; found_start = ajTrue; } else { last_rn = this_rn; continue; } } /* ** Assign sequence for residues missing from the linked list ** of atoms of known structure */ for(x=last_rn; xChains[chn-1]->Seq, x), &tmp2)) { ajListIterDel(&iter); ajStrDel(&tmp1); ajStrDel(&tmp2); ajStrDel(&tmpstr); ajWarn("Index out of range in " "WriteSeqresDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Index out of range " "in WriteSeqresDomain\n", scop->Entry); return ajFalse; } else { ajStrAppendS(&tmp1, tmp2); ajStrAppendC(&tmp1, " "); rcnt++; } } last_rn = this_rn; /* Append the residue to the sequence */ ajStrAppendS(&tmp1, atm->Id3); ajStrAppendC(&tmp1, " "); rcnt++; /* The end position was specified, but has not been found yet */ if(!found_end && !noend) { ajStrAssignS(&tmpstr, scop->End[z]); ajStrAppendK(&tmpstr, '*'); /* End found */ /* if(!ajStrCmpCaseS(atm->Pdb, scop->End[z])) */ if(ajStrMatchWildS(atm->Pdb, tmpstr)) /* if(ajStrMatchWildS(resarr[atm->Idx-1]->Pdb, tmpstr)) */ { if(!ajStrMatchS(atm->Pdb, scop->End[z])) /* if(!ajStrMatchS(resarr[atm->Idx-1]->Pdb, scop->End[z])) */ { ajWarn("Domain end found by wildcard match only " "in WriteSeqresDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Domain end found " "by wildcard match only in " "WriteSeqresDomain\n", scop->Entry); } found_end = ajTrue; break; } } } } /* Domain start specified but not found */ if(!found_start && !nostart) { ajListIterDel(&iter); ajStrDel(&tmp1); ajStrDel(&tmp2); ajStrDel(&tmpstr); ajWarn("Domain start not found in WriteSeqresDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Domain start not found " "in WriteSeqresDomain\n", scop->Entry); return ajFalse; } /* Domain end specified but not found */ if(!found_end && !noend) { ajListIterDel(&iter); ajStrDel(&tmp1); ajStrDel(&tmp2); ajStrDel(&tmpstr); ajWarn("Domain end not found in WriteSeqresDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Domain end not found " "in WriteSeqresDomain\n", scop->Entry); return ajFalse; } /* ** Assign sequence for residues missing from end of linked list ** Only needs to be done where the end of the domain is not specified */ if(noend) { for(x=last_rn; xChains[chn-1]->Nres; x++) if(!ajResidueToTriplet( ajStrGetCharPos(pdb->Chains[chn-1]->Seq, x), &tmp2)) { ajStrDel(&tmp1); ajStrDel(&tmp2); ajStrDel(&tmpstr); ajListIterDel(&iter); ajWarn("Index out of range in WriteSeqresDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Index out of " "range in WriteSeqresDomain\n", scop->Entry); return ajFalse; } else { ajStrAppendS(&tmp1, tmp2); ajStrAppendC(&tmp1, " "); rcnt++; } } ajListIterDel(&iter); } /* ** If the domain was composed of more than once chain then a '.' is ** given as the chain identifier */ if(scop->N > 1) id = '.'; else id = pdb->Chains[chn-1]->Id; /* Print out SEQRES records */ for(p=ajStrGetPtr(tmp1), len=ajStrGetLen(tmp1), x=0, y=1; xChains[chn-1]->Residues, (void ***) &resarr); */ doneter = ajFalse; iter = ajListIterNewread(pdb->Chains[chn-1]->Atoms); while((atm=(AjPAtom)ajListIterGet(iter))) if(atm->Mod==mod) break; for(acnt=1; atm; atm=(AjPAtom)ajListIterGet(iter)) { /* Break if ont a new model */ if(atm->Mod!=mod) break; /* End of protein atoms - so write a TER record */ if(atm->Type!='P' && (!doneter)) { if(mode == ajIDX) { ajFmtPrintF(outf, "%-6s%5d %-4S%c%4d%54s\n", "TER", acnt++, atm2->Id3, pdb->Chains[chn-1]->Id, atm2->Idx, " "); } else if(mode == ajPDB) { ajFmtPrintF(outf, "%-6s%5d %-4S%c%4S%54s\n", "TER", acnt++, atm2->Id3, pdb->Chains[chn-1]->Id, atm2->Pdb, /* resarr[atm2->Idx-1]->Pdb, */ " "); } else ajFatal("Invalid mode in WriteAtomChain"); doneter = ajTrue; } /* Write out ATOM or HETATM line */ if(atm->Type=='P') ajFmtPrintF(outf, "%-6s", "ATOM"); else ajFmtPrintF(outf, "%-6s", "HETATM"); if(mode == ajIDX) { ajFmtPrintF(outf, "%5d %-4S%-4S%c%4d%12.3f%8.3f%8.3f" "%6.2f%6.2f%11s%-3c\n", acnt++, atm->Atm, atm->Id3, pdb->Chains[chn-1]->Id, atm->Idx, atm->X, atm->Y, atm->Z, atm->O, atm->B, " ", *ajStrGetPtr(atm->Atm)); } else if(mode == ajPDB) { ajFmtPrintF(outf, "%5d %-4S%-4S%c%4S%12.3f%8.3f%8.3f" "%6.2f%6.2f%11s%-3c\n", acnt++, atm->Atm, atm->Id3, pdb->Chains[chn-1]->Id, atm->Pdb, /* resarr[atm->Idx-1]->Pdb, */ atm->X, atm->Y, atm->Z, atm->O, atm->B, " ", *ajStrGetPtr(atm->Atm)); } else ajFatal("Invalid mode in WriteAtomChain"); atm2 = atm; } /* Write TER record if its not already done */ if(!doneter) { ajFmtPrintF(outf, "%-6s%5d %-4S%c%4d%54s\n", "TER", acnt++, atm2->Id3, pdb->Chains[chn-1]->Id, atm2->Idx, " "); doneter=ajTrue; } ajListIterDel(&iter); /* if(resarr) AJFREE(resarr); */ return ajTrue; } /* @funcstatic WriteAtomDomain ************************************************ ** ** Writes coordinates for a SCOP domain to an output file in pdb format (ATOM ** records). Coordinates are taken from a Pdb structure, domain definition is ** taken from a Scop structure. The model number argument should have a value ** of 1 for x-ray structures. Coordinates for heterogens are NOT written to ** file. The corrected residue numbers are given (these give an index into ** the SEQRES sequence. ** ** @param [w] errf [AjPFile] Output file stream for error messages ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** @param [r] scop [const AjPScop] Scop object ** @param [r] mod [ajint] Model number, beginning at 1 ** @param [r] mode [ajint] Either ajPDB or ajIDX if the original or corrected ** residue number is to be used. ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteAtomDomain(AjPFile errf, AjPFile outf, const AjPPdb pdb, const AjPScop scop, ajint mod, ajint mode) { /* ** rn_mod is a modifier to the residue number to give correct residue ** numbering for the domain */ ajint acnt = 1; ajint rn_mod = 0; ajint z; ajint finalrn = 0; ajint chn; char id = '\0'; AjBool found_start = ajFalse; AjBool found_end = ajFalse; AjBool nostart = ajFalse; AjBool noend = ajFalse; AjIList iter = NULL; AjPAtom atm = NULL; AjPAtom atm2 = NULL; AjPStr tmpstr = NULL; /* AjPResidue *resarr = NULL; */ if(!errf || !outf || !pdb || !scop) ajFatal("Bad args. passed to WriteAtomDomain"); /* Allocate strings etc */ tmpstr = ajStrNew(); /* Loop for each chain in the domain */ for(z=0;zN;z++,found_start=ajFalse, found_end=ajFalse) { /* Check for chain error */ if(!ajPdbChnidToNum(scop->Chain[z], pdb, &chn)) { ajListIterDel(&iter); ajWarn("Chain incompatibility error in " "WriteAtomDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Chain incompatibility " "error in WriteAtomDomain\n", scop->Entry); ajStrDel(&tmpstr); return ajFalse; } /* ajListToarray(pdb->Chains[chn-1]->Residues, (void ***) &resarr); */ /* Iteratre up to the correct model */ iter=ajListIterNewread(pdb->Chains[chn-1]->Atoms); while((atm = (AjPAtom)ajListIterGet(iter))) { if(atm->Mod==mod) break; } if(!atm) ajFatal("Unexpected error (atm == NULL) in WriteAtomDomain\n"); /* Increment res. counter from last chain if appropriate */ if(noend) rn_mod += atm2->Idx; else rn_mod += finalrn; /* Start of chain was not specified */ if(!ajStrCmpC(scop->Start[z], ".")) nostart = ajTrue; else nostart = ajFalse; /* End of chain was not specified */ if(!ajStrCmpC(scop->End[z], ".")) noend = ajTrue; else noend = ajFalse; /* If the domain was composed of more than once chain then a '.' is given as the chain identifier */ if(scop->N > 1) id = '.'; else id = pdb->Chains[chn-1]->Id; for(; atm; atm=(AjPAtom)ajListIterGet(iter)) { /* ** Continue / break if a non-protein atom is found or ** model no. is incorrect */ if(atm->Mod!=mod) break; if(atm->Type!='P') continue; /* ** The start position was specified, but has not ** been found yet */ if(!found_start && !nostart) { ajStrAssignS(&tmpstr, scop->Start[z]); ajStrAppendK(&tmpstr, '*'); /* Start position found */ /* if(!ajStrCmpCaseS(atm->Pdb, scop->Start[z])) */ if(ajStrMatchWildS(atm->Pdb, tmpstr)) /* if(ajStrMatchWildS(resarr[atm->Idx-1]->Pdb, tmpstr)) */ { if(!ajStrMatchS(atm->Pdb, scop->Start[z])) /* if(!ajStrMatchS(resarr[atm->Idx-1]->Pdb, scop->Start[z])) */ { ajWarn("Domain start found by wildcard match only " "in WriteAtomDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Domain start found " "by wildcard match only in " "WriteAtomDomain\n", scop->Entry); } rn_mod -= atm->Idx-1; found_start = ajTrue; } else continue; } /* The end position was specified, but has not been found yet */ if(!found_end && !noend) { ajStrAssignS(&tmpstr, scop->End[z]); ajStrAppendK(&tmpstr, '*'); /* End position found */ /* if(!ajStrCmpCaseS(atm->Pdb, scop->End[z])) */ if(ajStrMatchWildS(atm->Pdb, tmpstr)) /* if(ajStrMatchWildS(resarr[atm->Idx-1]->Pdb, tmpstr)) */ { if(!ajStrMatchS(atm->Pdb, scop->End[z])) /* if(!ajStrMatchS(resarr[atm->Idx-1]->Pdb, scop->End[z])) */ { ajWarn("Domain end found by wildcard match only " "in WriteAtomDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Domain end found " "by wildcard match only in " "WriteAtomDomain\n", scop->Entry); } found_end = ajTrue; finalrn = atm->Idx; } } else if(atm->Idx != finalrn && !noend) break; /* Write out ATOM line to pdb file */ if(mode == ajIDX) ajFmtPrintF(outf, "%-6s%5d %-4S%-4S%c%4d%12.3f%8.3f" "%8.3f%6.2f%6.2f%11s%-3c\n", "ATOM", acnt++, atm->Atm, atm->Id3, id, atm->Idx+rn_mod, atm->X, atm->Y, atm->Z, atm->O, atm->B, " ", *ajStrGetPtr(atm->Atm)); else ajFmtPrintF(outf, "%-6s%5d %-4S%-4S%c%4S%12.3f%8.3f" "%8.3f%6.2f%6.2f%11s%-3c\n", "ATOM", acnt++, atm->Atm, atm->Id3, id, atm->Pdb, /* resarr[atm->Idx-1]->Pdb, */ atm->X, atm->Y, atm->Z, atm->O, atm->B, " ", *ajStrGetPtr(atm->Atm)); /* Assign pointer for this chain */ atm2=atm; } /* Diagnostic if start was specified but not found */ if(!found_start && !nostart) { ajListIterDel(&iter); ajWarn("Domain start not found in WriteAtomDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Domain start not " "found in WriteAtomDomain\n", scop->Entry); ajStrDel(&tmpstr); return ajFalse; } /* Diagnostic if end was specified but not found */ if(!found_end && !noend) { ajListIterDel(&iter); ajWarn("Domain end not found in WriteAtomDomain"); ajFmtPrintF(errf, "//\n%S\nERROR Domain end not " "found in WriteAtomDomain\n", scop->Entry); ajStrDel(&tmpstr); return ajFalse; } ajListIterDel(&iter); } if(!atm2) ajFatal("Unexpected error (atm2 == NULL) in WriteAtomDomain\n"); /* Write the TER record to the pdb file */ ajFmtPrintF(outf, "%-6s%5d %-4S%c%4d%54s\n", "TER", acnt++, atm2->Id3, id, atm2->Idx+rn_mod, " "); ajStrDel(&tmpstr); return ajTrue; } /* @funcstatic WriteAtomDomainPdb ******************************************* ** ** Writes coordinates for a SCOP domain to an output file in pdb format (ATOM ** records). Coordinates are taken from a Pdb structure, domain definition ** is taken from a Scop structure. ** Coordinates for heterogens are NOT written to ** file. The original (pdb) residue numbers are given (these do NOT give an ** index into the SEQRES sequence. Use WriteAtomDomainIdx if you ** need an index into the SEQRES sequence. ** ** @param [w] errf [AjPFile] Output file stream for error messages ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** @param [r] scop [const AjPScop] Scop object ** @param [r] mod [ajint] Model number, beginning at 1 ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteAtomDomainPdb(AjPFile errf, AjPFile outf, const AjPPdb pdb, const AjPScop scop, ajint mod) { if(WriteAtomDomain(errf, outf, pdb, scop, mod, ajPDB)) return ajTrue; return ajFalse; } /* @funcstatic WriteAtomDomainIdx ******************************************* ** ** Writes coordinates for a SCOP domain to an output file in pdb format (ATOM ** records). Coordinates are taken from a Pdb structure, domain definition ** is taken from a Scop structure. The model number argument should have a ** value of 1 for x-ray structures. Coordinates for heterogens are NOT ** written to file. The corrected residue numbers are given (these give an ** index into the SEQRES sequence). Use WriteAtomDomainPdb if you ** wish to maintain the original residue number. ** ** @param [w] errf [AjPFile] Output file stream for error messages ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** @param [r] scop [const AjPScop] Scop object ** @param [r] mod [ajint] Model number, beginning at 1 ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteAtomDomainIdx(AjPFile errf, AjPFile outf,const AjPPdb pdb, const AjPScop scop, ajint mod) { if(WriteAtomDomain(errf, outf, pdb, scop, mod, ajIDX)) return ajTrue; return ajFalse; } /* @funcstatic WriteHeterogen *********************************************** ** ** Writes coordinates for heterogens that could not be uniquely associated ** with a chain to an output file in pdb format (HETATM records). Coordinates ** are taken from a Pdb structure. The model number argument should have a ** value of 1 for x-ray structures. ** ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** @param [r] mod [ajint] Model number, beginning at 1 ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteHeterogen(AjPFile outf, const AjPPdb pdb, ajint mod) { AjIList iter = NULL; AjPAtom atm = NULL; ajint acnt; /* Check args are not NULL */ if(!outf || !pdb || mod<1) return ajFalse; iter=ajListIterNewread(pdb->Groups); while((atm = (AjPAtom)ajListIterGet(iter))) if(atm->Mod==mod) break; for(acnt=1; atm; atm=(AjPAtom)ajListIterGet(iter)) { /* Break if on t0 a new model */ if(atm->Mod!=mod) break; /* Write out HETATM line */ if(atm->Type == 'H') ajFmtPrintF(outf, "%-6s%5d %-4S%-4S%c%4d%12.3f%8.3f%8.3f" "%6.2f%6.2f%11s%-3c\n", "HETATM", acnt++, atm->Atm, atm->Id3, ajChararrGet(pdb->gpid, atm->Gpn-1), atm->Idx, atm->X, atm->Y, atm->Z, atm->O, atm->B, " ", *ajStrGetPtr(atm->Atm)); else ajFmtPrintF(outf, "%-6s%5d %-4S%-4S%c%4d%12.3f%8.3f%8.3f" "%6.2f%6.2f%11s%-3c\n", "HETATM", acnt++, atm->Atm, atm->Id3, ' ', atm->Idx, atm->X, atm->Y, atm->Z, atm->O, atm->B, " ", *ajStrGetPtr(atm->Atm)); /* atm2 = atm; Unused variable */ } ajListIterDel(&iter); return ajTrue; } /* @funcstatic WriteText **************************************************** ** ** Writes text to file in the format of pdb records ** ** @param [w] outf [AjPFile] Output file stream ** @param [r] str [const AjPStr] Text to print out ** @param [r] prefix [const char *] pdb record (e.g. "HEADER") ** ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteText(AjPFile outf, const AjPStr str, const char *prefix) { ajint n = 0; ajint l = 0; ajint c = 0; AjPStrTok handle = NULL; AjPStr token = NULL; AjPStr tmp = NULL; if(!outf) return ajFalse; /* Initialise strings */ token = ajStrNew(); tmp = ajStrNewC(""); handle = ajStrTokenNewC(str," \t\r\n"); while(ajStrTokenNextParse(&handle,&token)) { if(!c) ajFmtPrintF(outf,"%-11s",prefix); if((l=n+ajStrGetLen(token)) < 68) { if(c++) ajStrAppendC(&tmp," "); ajStrAppendS(&tmp,token); n = ++l; } else { ajFmtPrintF(outf,"%-*S\n",69, tmp); ajStrAssignS(&tmp,token); ajStrAppendC(&tmp," "); n = ajStrGetLen(token); c = 0; } } if(c) ajFmtPrintF(outf,"%-*S\n",69, tmp); ajStrTokenDel(&handle); ajStrDel(&token); ajStrDel(&tmp); return ajTrue; } /* @funcstatic WriteHeader ************************************************** ** ** Writes the Pdb element of a Pdb structure to an output file in pdb format ** ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteHeader(AjPFile outf, const AjPPdb pdb) { if(pdb && outf) { ajFmtPrintF(outf, "%-11sCLEANED-UP PDB FILE FOR %-45S\n", "HEADER", pdb->Pdb); return ajTrue; } return ajFalse; } /* @funcstatic WriteHeaderScop ********************************************** ** ** Writes the Entry element of a Scop structure to an output file in pdb ** format ** ** @param [w] outf [AjPFile] Output file stream ** @param [r] scop [const AjPScop] Scop object ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteHeaderScop(AjPFile outf, const AjPScop scop) { if(scop && outf) { ajFmtPrintF(outf, "%-11sCLEANED-UP PDB FILE FOR SCOP DOMAIN %-33S\n", "HEADER", scop->Entry); return ajTrue; } return ajFalse; } /* @funcstatic WriteTitle *************************************************** ** ** Writes a TITLE record to an output file in pdb format ** The text is hard-coded. ** ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteTitle(AjPFile outf, const AjPPdb pdb) { if(pdb && outf) { ajFmtPrintF(outf, "%-11sTHIS FILE IS MISSING MOST RECORDS FROM THE " "ORIGINAL PDB FILE%9s\n", "TITLE", " "); return ajTrue; } return ajFalse; } /* @funcstatic WriteCompnd ************************************************** ** ** Writes the Compnd element of a Pdb structure to an output file in pdb ** format ** ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteCompnd(AjPFile outf, const AjPPdb pdb) { if(pdb && outf) { WriteText(outf,pdb->Compnd,"COMPND"); return ajTrue; } return ajFalse; } /* @funcstatic WriteSource ************************************************** ** ** Writes the Source element of a Pdb structure to an output file in pdb ** format ** ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteSource(AjPFile outf, const AjPPdb pdb) { if(pdb && outf) { WriteText(outf,pdb->Source,"SOURCE"); return ajTrue; } return ajFalse; } /* @funcstatic WriteEmptyRemark ********************************************* ** ** Writes an empty REMARK record to an output file in pdb format ** ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteEmptyRemark(AjPFile outf, const AjPPdb pdb) { if(pdb && outf) { ajFmtPrintF(outf, "%-11s%-69s\n", "REMARK", " "); return ajTrue; } return ajFalse; } /* @funcstatic WriteResolution ********************************************** ** ** Writes the Reso element of a Pdb structure to an output file in pdb ** format ** ** @param [w] outf [AjPFile] Output file stream ** @param [r] pdb [const AjPPdb] Pdb object ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool WriteResolution(AjPFile outf, const AjPPdb pdb) { if(pdb && outf) { ajFmtPrintF(outf, "%-11sRESOLUTION. %-6.2f%-51s\n", "REMARK", pdb->Reso, "ANGSTROMS."); return ajTrue; } return ajFalse; } /* @funcstatic ReadLines ****************************************** ** ** Reads a pdb file and returns a pointer to a partially filled Pdbfile object. ** All of the lines from the pdb file are written to the array of the ** object and the element is written. ** ** Memory for the object itself and any arrays whose size is equal to the ** number of lines is allocated: ** lines, linetype, chnn, gpn, modn, resni, resn1, resn2, pdbn, oddnum, atype, ** rtype, x,y,z,o,b, elementNum, elementId, elementType & helixClass. ** ** The following elements are written: ** nlines, lines. ** ** The linetype array is set to default value of PDBPARSE_IGNORE ** ** @param [u] inf [AjPFile] Pointer to pdb file ** ** @return [AjPPdbfile] Pdbfile object pointer, or NULL on failure. ** @@ ****************************************************************************/ static AjPPdbfile ReadLines(AjPFile inf) { AjPPdbfile ptr =NULL; /* pdbfile object to be returned */ AjPList list =NULL; /* List of lines in pdb file */ AjPStr line =NULL; /* A line from a pdb file */ ajint i =0; /* Check args */ if(!inf) return NULL; /* Allocate list and pdbfile object*/ list = ajListstrNew(); /* Don't know number of lines or chains yet */ ptr = PdbfileNew(0,0); /* Read pdb file and append lines to list */ line = ajStrNew(); while(ajReadlineTrim(inf,&line)) { ajListstrPushAppend(list, line); line = ajStrNew(); } /* Convert list to array in pdbfile object */ ptr->nlines = ajListstrToarray(list, &(ptr->lines)); if(ptr->nlines==0) { ajStrDel(&line); ajListstrFree(&list); PdbfileDel(&ptr); return NULL; } /* Allocate memory for x,y,z,o,b, modn, chnn, linetype, ok, coord and pdbn arrays in pdbfile object */ AJCNEW0(ptr->x, ptr->nlines); AJCNEW0(ptr->y, ptr->nlines); AJCNEW0(ptr->z, ptr->nlines); AJCNEW0(ptr->o, ptr->nlines); AJCNEW0(ptr->b, ptr->nlines); AJCNEW0(ptr->elementNum, ptr->nlines); AJCNEW0(ptr->elementType, ptr->nlines); AJCNEW0(ptr->helixClass, ptr->nlines); AJCNEW0(ptr->resni, ptr->nlines); AJCNEW0(ptr->resn1, ptr->nlines); AJCNEW0(ptr->resn2, ptr->nlines); AJCNEW0(ptr->modn, ptr->nlines); AJCNEW0(ptr->chnn, ptr->nlines); AJCNEW0(ptr->gpn, ptr->nlines); AJCNEW0(ptr->linetype, ptr->nlines); for(i=0;inlines;i++) ptr->linetype[i]=PDBPARSE_IGNORE; AJCNEW0(ptr->pdbn, ptr->nlines); for(i=0;inlines;i++) ptr->pdbn[i]=ajStrNew(); AJCNEW0(ptr->elementId, ptr->nlines); for(i=0;inlines;i++) ptr->elementId[i]=ajStrNew(); AJCNEW0(ptr->atype, ptr->nlines); for(i=0;inlines;i++) ptr->atype[i]=ajStrNewRes(4); AJCNEW0(ptr->rtype, ptr->nlines); for(i=0;inlines;i++) ptr->rtype[i]=ajStrNewRes(4); AJCNEW0(ptr->oddnum, ptr->nlines); for(i=0;inlines;i++) ptr->oddnum[i]=ajFalse; /*DIAGNOSTIC // for(i=0;inlines;i++) // { // ajFmtPrintF(tempfile, "%S\n", ptr->lines[i]); // fflush(tempfile->fp); // } */ /* Tidy up and return */ ajStrDel(&line); ajListstrFree(&list); return ptr; } /* @funcstatic PdbfileDel ************************************************** ** ** Destructor for pdbfile object. ** ** @param [d] pthis [AjPPdbfile*] Pdbfile object pointer ** ** @return [void] ** @@ ****************************************************************************/ static void PdbfileDel(AjPPdbfile *pthis) { ajint i; /* Check args */ if(!(*pthis)) { ajWarn("NULL arg passed to PdbfileDel.\n"); return; } if((*pthis)->resn1ok) AJFREE((*pthis)->resn1ok); if((*pthis)->nres) AJFREE((*pthis)->nres); if((*pthis)->nligands) AJFREE((*pthis)->nligands); if((*pthis)->chainok) AJFREE((*pthis)->chainok); if((*pthis)->pdbid) ajStrDel(&(*pthis)->pdbid); if((*pthis)->compnd) ajStrDel(&(*pthis)->compnd); if((*pthis)->source) ajStrDel(&(*pthis)->source); if((*pthis)->modn) AJFREE((*pthis)->modn); if((*pthis)->x) AJFREE((*pthis)->x); if((*pthis)->y) AJFREE((*pthis)->y); if((*pthis)->z) AJFREE((*pthis)->z); if((*pthis)->o) AJFREE((*pthis)->o); if((*pthis)->b) AJFREE((*pthis)->b); if((*pthis)->numHelices) AJFREE((*pthis)->numHelices); if((*pthis)->numStrands) AJFREE((*pthis)->numStrands); if((*pthis)->numSheets) AJFREE((*pthis)->numSheets); if((*pthis)->numTurns) AJFREE((*pthis)->numTurns); if((*pthis)->elementNum) AJFREE((*pthis)->elementNum); if((*pthis)->elementType) AJFREE((*pthis)->elementType); if((*pthis)->helixClass) AJFREE((*pthis)->helixClass); if((*pthis)->resni) AJFREE((*pthis)->resni); if((*pthis)->resn1) AJFREE((*pthis)->resn1); if((*pthis)->resn2) AJFREE((*pthis)->resn2); if((*pthis)->chnn) AJFREE((*pthis)->chnn); if((*pthis)->gpn) AJFREE((*pthis)->gpn); if((*pthis)->linetype) AJFREE((*pthis)->linetype); if((*pthis)->oddnum) AJFREE((*pthis)->oddnum); if((*pthis)->lines) { for(i=0;i<(*pthis)->nlines; i++) ajStrDel(&(*pthis)->lines[i]); AJFREE((*pthis)->lines); } if((*pthis)->pdbn) { for(i=0;i<(*pthis)->nlines; i++) ajStrDel(&(*pthis)->pdbn[i]); AJFREE((*pthis)->pdbn); } if((*pthis)->elementId) { for(i=0;i<(*pthis)->nlines; i++) ajStrDel(&(*pthis)->elementId[i]); AJFREE((*pthis)->elementId); } if((*pthis)->atype) { for(i=0;i<(*pthis)->nlines; i++) ajStrDel(&(*pthis)->atype[i]); AJFREE((*pthis)->atype); } if((*pthis)->rtype) { for(i=0;i<(*pthis)->nlines; i++) ajStrDel(&(*pthis)->rtype[i]); AJFREE((*pthis)->rtype); } if((*pthis)->seqres) { for(i=0;i<(*pthis)->nchains; i++) ajStrDel(&(*pthis)->seqres[i]); AJFREE((*pthis)->seqres); } if((*pthis)->seqresful) { for(i=0;i<(*pthis)->nchains; i++) ajStrDel(&(*pthis)->seqresful[i]); AJFREE((*pthis)->seqresful); } if((*pthis)->chid) ajChararrDel(&(*pthis)->chid); if((*pthis)->gpid) ajChararrDel(&(*pthis)->gpid); AJFREE(*pthis); *pthis=NULL; return; } /* @funcstatic ElementsNew ************************************************* ** ** Constructor for Elements object. ** ** @param [r] nelms [ajint] no. of elements ** ** @return [AjPElements] Pointer to Elements object, or NULL on failure. ** @@ ****************************************************************************/ static AjPElements ElementsNew(ajint nelms) { AjPElements ret=NULL; ajint x =0; AJNEW0(ret); ret->n=nelms; if(nelms) { AJCNEW0(ret->elms, nelms); for(x=0;xelms[x] = ElementNew(); } /* else ajWarn("Value of zero passed to ElementsNew"); */ return ret; } /* @funcstatic ElementsDel ************************************************* ** ** Destructor for Elements object. ** ** @param [d] ptr [AjPElements*] Elements object pointer ** ** @return [void] ** @@ ****************************************************************************/ static void ElementsDel(AjPElements *ptr) { ajint x=0; if(!(*ptr)) { ajWarn("NULL arg passed to ElementsDel"); return; } if((*ptr)->elms) { for(x=0; x<(*ptr)->n; x++) ElementDel(&(*ptr)->elms[x]); AJFREE((*ptr)->elms); } AJFREE((*ptr)); *ptr=NULL; return; } /* @funcstatic ElementNew ************************************************** ** ** Constructor for Element object. ** ** @return [AjPElement] Pointer to Element object, or NULL on failure. ** @@ ****************************************************************************/ static AjPElement ElementNew(void) { AjPElement ret; AJNEW0(ret); ret->elementId = ajStrNew(); ret->initResName = ajStrNew(); ret->initSeqNum = ajStrNew(); ret->endResName = ajStrNew(); ret->endSeqNum = ajStrNew(); ret->elementNum = 0; ret->elementType = ' '; ret->helixClass = 0; ret->chainId = ' '; return ret; } /* @funcstatic ElementDel ************************************************** ** ** Destructor for Element object. ** ** @param [d] ptr [AjPElement*] Element object pointer ** ** @return [void] ** @@ ****************************************************************************/ static void ElementDel(AjPElement *ptr) { ajStrDel(&((*ptr)->elementId)); ajStrDel(&((*ptr)->initResName)); ajStrDel(&((*ptr)->initSeqNum)); ajStrDel(&((*ptr)->endResName)); ajStrDel(&((*ptr)->endSeqNum)); AJFREE((*ptr)); *ptr=NULL; return; } /* @funcstatic PdbfileNew ************************************************** ** ** Constructor for Pdbfile object. ** ** @param [r] nlines [ajint] No. of lines in pdb file ** @param [r] nchains [ajint] No. of chains in pdb file ** ** @return [AjPPdbfile] Pointer to pdbfile object, or NULL on failure. ** @@ ****************************************************************************/ static AjPPdbfile PdbfileNew(ajint nlines, ajint nchains) { AjPPdbfile ret =NULL; ajint i=0; AJNEW0(ret); ret->pdbid = ajStrNew(); ret->compnd = ajStrNew(); ret->source = ajStrNew(); ret->nomod=ajFalse; ret->toofewter=ajFalse; ret->nchains=nchains; if(nchains) { AJCNEW0(ret->resn1ok, nchains); for(i=0;iresn1ok[i]=ajTrue; AJCNEW0(ret->numHelices, nchains); for(i=0;inumHelices[i]=ajTrue; AJCNEW0(ret->numStrands, nchains); for(i=0;inumStrands[i]=ajTrue; AJCNEW0(ret->numSheets, nchains); for(i=0;inumSheets[i]=ajTrue; AJCNEW0(ret->numTurns, nchains); for(i=0;inumTurns[i]=ajTrue; AJCNEW0(ret->chainok, nchains); for(i=0;ichainok[i]=ajTrue; AJCNEW0(ret->nres, nchains); AJCNEW0(ret->nligands, nchains); AJCNEW0(ret->seqres, nchains); for(i=0;iseqres[i]=ajStrNew(); AJCNEW0(ret->seqresful, nchains); for(i=0;iseqresful[i]=ajStrNew(); ret->chid = ajChararrNewL(nchains); } else { ret->chid = ajChararrNew(); } ret->gpid = ajChararrNew(); ret->nlines = nlines; if(nlines) { AJCNEW0(ret->x, nlines); AJCNEW0(ret->y, nlines); AJCNEW0(ret->z, nlines); AJCNEW0(ret->o, nlines); AJCNEW0(ret->b, nlines); AJCNEW0(ret->elementNum, nlines); AJCNEW0(ret->elementType, nlines); AJCNEW0(ret->helixClass, nlines); AJCNEW0(ret->resni, nlines); AJCNEW0(ret->resn1, nlines); AJCNEW0(ret->resn2, nlines); AJCNEW0(ret->modn, nlines); AJCNEW0(ret->chnn, nlines); AJCNEW0(ret->gpn, nlines); AJCNEW0(ret->linetype, nlines); AJCNEW0(ret->oddnum, nlines); AJCNEW0(ret->lines, nlines); AJCNEW0(ret->pdbn, nlines); AJCNEW0(ret->elementId, nlines); AJCNEW0(ret->atype, nlines); AJCNEW0(ret->rtype, nlines); for(i=0;ilinetype[i]=PDBPARSE_IGNORE; ret->oddnum[i]=ajFalse; ret->lines[i]=ajStrNew(); ret->pdbn[i]=ajStrNew(); ret->elementId[i]=ajStrNew(); ret->atype[i]=ajStrNewRes(4); ret->rtype[i]=ajStrNewRes(4); } } /* else ajWarn("Zero sized arg passed to PdbfileNew.\n"); */ return ret; } /* @funcstatic FirstPass ****************************************************** ** ** The initial read of the pdb file as held in the array of a Pdbfile ** object. Bibliographic information is parsed, the number of chains ** determined and the sequences and chain ids from the SEQRES records are ** parsed. The line type (see below) of each line is determined, and for ** lines with coordinates, the residue type is parsed. A count of the number ** of TER records and the location of the first coordinte line is also ** determined. ** Secondary structure information is also parsed and an AjPElements ** object is written. ** ** Memory for any arrays whose size is equal to the number of chains is ** allocated: ** seqres, seqresful, nres, chainok, resn1ok, nligands, numHelices, ** numStrands, numSheets, numTurns ** ** The following arrays are written: ** seqres, seqresful, chid, pdbn, resn1 and resn2, rtype, linetype ** ** The following elements are written: ** nchains, tercnt, modcnt, nomod, source, compnd, method, reso, idxfirst ** ** The chainok & resn1ok arrays are set to default values of ajTrue ** ** ** Writing resn1/resn2 & pdbn arrays ** The pdbn array is the raw residue number (as a string) and is filled for ** lines for which ==PDBPARSE_COORD. The resn1/resn2 arrays are ** given initial values which at this stage are simply the integer component ** of pdbn. The values for resn1/resn2 are changed later in the program. ** ** Writing modcnt and nomod elements ** modcnt is a count of the number of MODEL records (excluding duplicate ** records). However, if no MODEL records are found, modcnt is set to the ** minimum value of 1, and nomod is set to ajTrue. ** ** Writing linetype array ** The linetype array is set as follows: ** ** PDBPARSE_COORD for ATOM or HETATM records which contain both atom and ** residue identifier codes and which are not duplicate positions. ** Duplicate positions for (i) whole residues or (ii) individual atoms ** are presumed where a ATOM or HETATM record uses a value other than '1' or ** 'A' in the (i) residue alternate location indicator (column 17) or (ii) ** the first column of the atom name (column 13) respectively. ** ** PDBPARSE_TER for TER records ** ** PDBPARSE_MODEL for MODEL records ** ** PDBPARSE_COORDWAT for HOH (should be HETATM records) ** ** For all other lines, it is left as the default of PDBPARSE_IGNORE (the value ** might change later in the program). ** ** ** @param [w] pdbfile [AjPPdbfile] Pdbfile object ** @param [u] logf [AjPFile] Log file (build diagnostics) ** @param [w] elms [AjPElements*] Elements object pointer ** @param [r] camask [AjBool] Whether to mask non-amino acid residues ** within protein chains which do not ** have a C-alpha atom. ** ** @return [AjBool] True if file was parsed, False otherwise ** @@ ****************************************************************************/ static AjBool FirstPass(AjPPdbfile pdbfile, AjPFile logf, AjPElements *elms, AjBool camask) { ajint i=0; /* Loop counter */ ajint j=0; /* Loop counter */ ajint k=0; /* Loop counter */ AjBool donefirstatom=ajFalse; /* Flag for finding first ATOM or HETATM line */ char pdbn[6]; /* Residue number */ AjBool resolfound=ajFalse; /* Flag for finding RESOLUTION record */ AjBool seqresfound=ajFalse; /* Flag for finding SEQRES record */ AjPStr tmpstr=NULL; /* A temp. string */ AjPStr seqres=NULL; /* Sequence from SEQRES records */ ajint seqreslen=0; /* Indicated length of sequence from SEQRES records */ ajint lenful=0; /* Length of SEQRES sequence including ACE, FOR & NH2 groups that might be discarded by the call to SeqresToSequence */ char last_id=' '; /* CHain id of last SEQRES line read*/ AjPStr tmpseq=NULL; /* A temp. string for a sequence */ AjPStr tmpseqful=NULL; /* A temp. string for a sequence */ AjPList listseqs=NULL; /* For list of sequences from SEQRES records*/ AjPList listseqsful=NULL; /* For list of sequences (using 3-letter codes) from SEQRES records*/ AjBool done_msg=ajFalse; /* Flag for error messaging */ AjPList listelms=NULL; /* Temp. list of secondary structure elements (from HELIX, SHEET and TURN records) */ AjPElement elm=NULL; /* Temp. Element object pointer */ AjPElement FirstStrand=NULL; /* Temp. pointer to first strand of each sheet*/ AjBool doneFirstStrand=ajFalse;/* Flag for parsing first strand of each sheet */ AjPStr LastSheetId=NULL; /* Sheet identifier of the last sheet read in */ /* Check args */ if(!pdbfile || !logf || !(*elms)) { ajWarn("Bad args passed to FirstPass\n"); return ajFalse; } /* Allocate memory etc*/ tmpstr=ajStrNew(); seqres=ajStrNew(); LastSheetId=ajStrNew(); listseqs=ajListstrNew(); listseqsful=ajListstrNew(); listelms=ajListNew(); /* Start of main loop */ for(i=0;inlines;i++) { if((ajStrPrefixC(pdbfile->lines[i],"ATOM")) || (ajStrPrefixC(pdbfile->lines[i],"HETATM"))) { /*In instances where >1 residue positions are given, ignore all but position 'A' & '1' In instances where >1 atom positions are given, ignore all but position '1'*/ if(((ajStrGetCharPos(pdbfile->lines[i], 16) != ' ') && ((ajStrGetCharPos(pdbfile->lines[i], 16) != 'A') && (ajStrGetCharPos(pdbfile->lines[i], 16) != '1'))) || ((ajStrGetCharPos(pdbfile->lines[i], 12) != ' ') && ((ajStrGetCharPos(pdbfile->lines[i], 12) != '1') && (isdigit((int)ajStrGetCharPos(pdbfile->lines[i], 12)))))) { if(!done_msg) { ajFmtPrintF(logf, "%-15s%d\n", "DUPATOMRES", i+1); done_msg=ajTrue; } } /*In instances where no atom or residue identity is given, ignore line */ else if((ajStrGetCharPos(pdbfile->lines[i], 12)==' ' && ajStrGetCharPos(pdbfile->lines[i], 13)==' ' && ajStrGetCharPos(pdbfile->lines[i], 14)==' ') || (ajStrGetCharPos(pdbfile->lines[i], 17)==' ' && ajStrGetCharPos(pdbfile->lines[i], 19)==' ')) { ajFmtPrintF(logf, "%-15s%d\n", "NOATOMRESID", i+1); } else { pdbfile->linetype[i]=PDBPARSE_COORD; if(!donefirstatom) { donefirstatom=ajTrue; pdbfile->idxfirst=i; } /*Write residue number for the line */ for(k=22, j=0; k<=26; k++) if((isalnum((int)ajStrGetCharPos(pdbfile->lines[i], k))) || ajStrGetCharPos(pdbfile->lines[i], k)=='-') pdbn[j++] = ajStrGetCharPos(pdbfile->lines[i], k); pdbn[j]='\0'; ajStrAssignC(&(pdbfile->pdbn[i]), pdbn); if(!ajFmtScanS(pdbfile->pdbn[i], "%d", &(pdbfile->resn1[i]))) { ajFmtPrintF(logf, "%-15s%d\n", "ATOMNONUM", i+1); pdbfile->linetype[i]=PDBPARSE_IGNORE; } else { pdbfile->resn2[i]=pdbfile->resn1[i]; /* Assign residue type */ ajStrAssignSubS(&pdbfile->rtype[i], pdbfile->lines[i], 17, 19); ajStrRemoveWhite(&pdbfile->rtype[i]); /* JONNEW */ if(ajStrMatchC(pdbfile->rtype[i], "HOH")) pdbfile->linetype[i]=PDBPARSE_COORDWAT; } } } else if(ajStrPrefixC(pdbfile->lines[i],"SEQRES")) { seqresfound = ajTrue; /* Read first SEQRES line */ ajStrAssignSubS(&seqres, pdbfile->lines[i], 14, 18); if(!ajFmtScanS(seqres, "%d", &seqreslen)) ajFmtPrintF(logf, "%-15s%d (%c)\n", "SEQRESLEN", pdbfile->nchains, ajStrGetCharPos(pdbfile->lines[i], 11)); ajStrAssignSubS(&seqres, pdbfile->lines[i], 19, 70); /* Append a ' ' in case this is missing from the PDB file, e.g. pdb1iie.ent */ ajStrAppendK(&seqres, ' '); ajChararrPut(&(pdbfile->chid), 0, (last_id=ajStrGetCharPos(pdbfile->lines[i], 11))); pdbfile->nchains++; /* Read subsequent SEQRES lines */ for(i++;inlines;i++) if(ajStrPrefixC(pdbfile->lines[i],"SEQRES")) { /* Still on same chain */ if(ajStrGetCharPos(pdbfile->lines[i], 11) == last_id) { ajStrAppendSubS(&seqres, pdbfile->lines[i], 19, 70); /* Append a ' ' in case this is missing from the PDB file, e.g. pdb1iie.ent */ ajStrAppendK(&seqres, ' '); } /* On new chain */ else { tmpseq=ajStrNew(); /* Process last chain */ if(!SeqresToSequence(seqres, &tmpseq, camask, &lenful)) { ajWarn("Sequence conversion error in" " FirstPass\nEmail jison@hgmp.mrc.ac.uk\n"); ajStrDel(&tmpseq); continue; } tmpseqful=ajStrNew(); ajStrAssignS(&tmpseqful, seqres); /* Check length of sequenece vs indicated length */ if(lenful != seqreslen) { ajFmtPrintF(logf, "%-15s%d (%c)\n", "SEQRESLENDIF", pdbfile->nchains, last_id); } /* Push sequences onto lists */ ajListstrPushAppend(listseqs, tmpseq); ajListstrPushAppend(listseqsful, tmpseqful); /* Read first SEQRES line of new chain */ ajStrAssignSubS(&seqres, pdbfile->lines[i], 14, 18); if(!ajFmtScanS(seqres, "%d", &seqreslen)) ajFmtPrintF(logf, "%-15s%d (%c)\n", "SEQRESLEN", pdbfile->nchains, ajStrGetCharPos(pdbfile->lines[i], 11)); ajStrAssignSubS(&seqres, pdbfile->lines[i], 19, 70); /* Append a ' ' in case this is missing from the PDB file, e.g. pdb1iie.ent */ ajStrAppendK(&seqres, ' '); ajChararrPut(&(pdbfile->chid), pdbfile->nchains, (last_id =ajStrGetCharPos(pdbfile->lines[i], 11))); /* if((last_id=ajStrGetCharPos(pdbfile->lines[i], 11))==' ') ajChararrPut(&(pdbfile->chid), pdbfile->nchains, '.'); else ajChararrPut(&(pdbfile->chid), pdbfile->nchains, last_id); */ pdbfile->nchains++; } } else { tmpseq=ajStrNew(); /* Process last chain */ if(!SeqresToSequence(seqres, &tmpseq, camask, &lenful)) { ajWarn("Sequence conversion error in " "FirstPass\nEmail jison@hgmp.mrc.ac.uk\n"); ajStrDel(&tmpseq); continue; } tmpseqful=ajStrNew(); ajStrAssignS(&tmpseqful, seqres); /* Check length of sequenece vs indicated length */ if(lenful != seqreslen) { ajFmtPrintF(logf, "%-15s%d (%c)\n", "SEQRESLENDIF", pdbfile->nchains, last_id); } /* Push sequences onto lists */ ajListstrPushAppend(listseqs, tmpseq); ajListstrPushAppend(listseqsful, tmpseqful); /* Convert lists to arrays in pdbfile object and delete list */ ajListstrToarray(listseqs, &(pdbfile->seqres)); ajListFree(&listseqs); ajListstrToarray(listseqsful, &(pdbfile->seqresful)); ajListFree(&listseqsful); /* i will get incremented in main loop */ i--; break; } } else if(ajStrPrefixC(pdbfile->lines[i],"COMPND")) { /* Read first COMPND line */ ajStrAssignSubS(&(pdbfile->compnd), pdbfile->lines[i], 10, 71); /* Read subsequent COMPND lines */ for(i++;inlines;i++) if(ajStrPrefixC(pdbfile->lines[i],"COMPND")) { ajStrAppendSubS(&(pdbfile->compnd), pdbfile->lines[i], 10, 71); } else { ajStrRemoveWhiteExcess(&(pdbfile->compnd)); /* i will get incremented in main loop */ i--; break; } } else if(ajStrPrefixC(pdbfile->lines[i],"SOURCE")) { /* Read first SOURCE line */ ajStrAssignSubS(&(pdbfile->source), pdbfile->lines[i], 10, 71); /* Read subsequent SOURCE lines */ for(i++;inlines;i++) if(ajStrPrefixC(pdbfile->lines[i],"SOURCE")) { ajStrAppendSubS(&(pdbfile->source), pdbfile->lines[i], 10, 71); } else { ajStrRemoveWhiteExcess(&(pdbfile->source)); /* i will get incremented in main loop */ i--; break; } } else if(ajStrPrefixC(pdbfile->lines[i],"TER")) { /* By default ok == ajTrue pdbfile->ok[i]=ajTrue; */ pdbfile->tercnt++; pdbfile->linetype[i]=PDBPARSE_TER; } else if(ajStrPrefixC(pdbfile->lines[i],"MODEL")) { pdbfile->modcnt++; pdbfile->linetype[i]=PDBPARSE_MODEL; if(!donefirstatom) { donefirstatom=ajTrue; pdbfile->idxfirst=i; } } else if(ajStrPrefixC(pdbfile->lines[i],"ENDMDL")) { pdbfile->linetype[i]=PDBPARSE_ENDMDL; } else if((!resolfound) && (ajStrPrefixC(pdbfile->lines[i], "REMARK"))) { /*Assign method and resolution */ ajFmtScanS(pdbfile->lines[i], "%*s %*d %S", &tmpstr); if(!ajStrCmpLenC(tmpstr, "RESOLUTION", 10)) { resolfound = ajTrue; if(isdigit((int)ajStrGetCharPos(pdbfile->lines[i], 23))) { if( (ajFmtScanS(pdbfile->lines[i], "%*s %*d %*s %f", &(pdbfile->reso))) != 1) ajFmtPrintF(logf, "%-15s\n", "RESOLUNK"); pdbfile->method=ajXRAY; } else { pdbfile->reso=0; pdbfile->method=ajNMR; } } } else if(ajStrPrefixC(pdbfile->lines[i],"HELIX")) { doneFirstStrand=ajFalse; elm = ElementNew(); ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 7, 9); ajStrRemoveWhite(&tmpstr); ajFmtScanS(tmpstr, "%d", &elm->elementNum); ajStrAssignSubS(&elm->elementId, pdbfile->lines[i], 11, 13); ajStrRemoveWhite(&elm->elementId); elm->elementType = 'H'; ajStrAssignSubS(&elm->initResName, pdbfile->lines[i], 15, 17); ajStrRemoveWhite(&elm->initResName); ajStrAssignSubS(&elm->initSeqNum, pdbfile->lines[i], 21, 25); ajStrRemoveWhite(&elm->initSeqNum); ajStrAssignSubS(&elm->endResName, pdbfile->lines[i], 27, 29); ajStrRemoveWhite(&elm->endResName); ajStrAssignSubS(&elm->endSeqNum, pdbfile->lines[i], 33, 37); ajStrRemoveWhite(&elm->endSeqNum); elm->chainId = ajStrGetCharPos(pdbfile->lines[i], 19); if(elm->chainId != ajStrGetCharPos(pdbfile->lines[i], 31)) { ajFmtPrintF(logf, "%-15s%c %c\n", "SECTWOCHN", elm->chainId, ajStrGetCharPos(pdbfile->lines[i], 31)); ElementDel(&elm); continue; } ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 38, 39); ajStrRemoveWhite(&tmpstr); ajFmtScanS(tmpstr, "%d", &elm->helixClass); /* Check that all records are present and flag an error if they're not */ if(MAJSTRGETLEN(elm->initResName) && MAJSTRGETLEN(elm->initSeqNum) && MAJSTRGETLEN(elm->endResName) && MAJSTRGETLEN(elm->endSeqNum) && MAJSTRGETLEN(elm->elementId)) { ajListPushAppend(listelms, elm); } else { ajFmtPrintF(logf, "%-15s%d\n", "SECMISS", i+1); ElementDel(&elm); continue; } } else if(ajStrPrefixC(pdbfile->lines[i],"SHEET")) { elm = ElementNew(); ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 7, 9); ajStrRemoveWhite(&tmpstr); ajFmtScanS(tmpstr, "%d", &elm->elementNum); ajStrAssignSubS(&elm->elementId, pdbfile->lines[i], 11, 13); ajStrRemoveWhite(&elm->elementId); if(!ajStrMatchS(elm->elementId, LastSheetId)) doneFirstStrand=ajFalse; elm->elementType = 'E'; ajStrAssignSubS(&elm->initResName, pdbfile->lines[i], 17, 19); ajStrRemoveWhite(&elm->initResName); ajStrAssignSubS(&elm->initSeqNum, pdbfile->lines[i], 22, 26); ajStrRemoveWhite(&elm->initSeqNum); ajStrAssignSubS(&elm->endResName, pdbfile->lines[i], 28, 30); ajStrRemoveWhite(&elm->endResName); ajStrAssignSubS(&elm->endSeqNum, pdbfile->lines[i], 33, 37); ajStrRemoveWhite(&elm->endSeqNum); elm->chainId = ajStrGetCharPos(pdbfile->lines[i], 21); if(elm->chainId != ajStrGetCharPos(pdbfile->lines[i], 32)) { ajFmtPrintF(logf, "%-15s%c %c\n", "SECTWOCHN", elm->chainId, ajStrGetCharPos(pdbfile->lines[i], 32)); ElementDel(&elm); continue; } /* Check for beta-barrels - where the first and last strands are identical requiring us to ignore the last strand */ if(doneFirstStrand) { if(ajStrMatchS(elm->initResName, FirstStrand->initResName) && ajStrMatchS(elm->endResName, FirstStrand->endResName) && ajStrMatchS(elm->initSeqNum, FirstStrand->initSeqNum) && ajStrMatchS(elm->endSeqNum, FirstStrand->endSeqNum)) { ElementDel(&elm); continue; } } /* Check that all records are present and flag an error if they're not */ if(MAJSTRGETLEN(elm->initResName) && MAJSTRGETLEN(elm->initSeqNum) && MAJSTRGETLEN(elm->endResName) && MAJSTRGETLEN(elm->endSeqNum) && MAJSTRGETLEN(elm->elementId)) { ajListPushAppend(listelms, elm); } else { ajFmtPrintF(logf, "%-15s%d\n", "SECMISS", i+1); ElementDel(&elm); continue; } ajStrAssignS(&LastSheetId, elm->elementId); FirstStrand=elm; doneFirstStrand=ajTrue; } else if(ajStrPrefixC(pdbfile->lines[i],"TURN")) { doneFirstStrand=ajFalse; elm = ElementNew(); ajStrAssignSubS(&tmpstr, pdbfile->lines[i], 7, 9); ajStrRemoveWhite(&tmpstr); ajFmtScanS(tmpstr, "%d", &elm->elementNum); ajStrAssignSubS(&elm->elementId, pdbfile->lines[i], 11, 13); ajStrRemoveWhite(&elm->elementId); elm->elementType = 'T'; ajStrAssignSubS(&elm->initResName, pdbfile->lines[i], 15, 17); ajStrRemoveWhite(&elm->initResName); ajStrAssignSubS(&elm->initSeqNum, pdbfile->lines[i], 20, 24); ajStrRemoveWhite(&elm->initSeqNum); ajStrAssignSubS(&elm->endResName, pdbfile->lines[i], 26, 28); ajStrRemoveWhite(&elm->endResName); ajStrAssignSubS(&elm->endSeqNum, pdbfile->lines[i], 31, 35); ajStrRemoveWhite(&elm->endSeqNum); elm->chainId = ajStrGetCharPos(pdbfile->lines[i], 19); if(elm->chainId != ajStrGetCharPos(pdbfile->lines[i], 30)) { ajFmtPrintF(logf, "%-15s%c %c\n", "SECTWOCHN", elm->chainId, ajStrGetCharPos(pdbfile->lines[i], 30)); ElementDel(&elm); continue; } /* Check that all records are present and flag an error if they're not */ if(MAJSTRGETLEN(elm->initResName) && MAJSTRGETLEN(elm->initSeqNum) && MAJSTRGETLEN(elm->endResName) && MAJSTRGETLEN(elm->endSeqNum) && MAJSTRGETLEN(elm->elementId)) { ajListPushAppend(listelms, elm); } else { ajFmtPrintF(logf, "%-15s%d\n", "SECMISS", i+1); ElementDel(&elm); continue; } } } /* Write array in Elements structure */ (*elms)->n=ajListToarray(listelms, (void ***) &(*elms)->elms); /* Generate diagnostics and set defaults */ if((ajStrGetLen(pdbfile->compnd) == 0)) { ajStrAssignC(&pdbfile->compnd, "."); ajFmtPrintF(logf, "%-15s\n", "NOCOMPND"); } if((ajStrGetLen(pdbfile->source) == 0)) { ajStrAssignC(&pdbfile->source, "."); ajFmtPrintF(logf, "%-15s\n", "NOSOURCE"); } if((pdbfile->method==ajNMR) && (pdbfile->modcnt==0)) ajFmtPrintF(logf, "%-15s\n", "NOMODEL"); if(pdbfile->reso && pdbfile->modcnt) { ajFmtPrintF(logf, "%-15s\n", "RESOLMOD"); pdbfile->method=ajNMR; } /* Every pdb file is considered to have at least one model */ if(pdbfile->modcnt==0) { pdbfile->modcnt=1; pdbfile->nomod=ajTrue; } if(!resolfound) { pdbfile->reso=0; pdbfile->method=ajNMR; ajFmtPrintF(logf, "%-15s\n","NORESOLUTION"); } if(!seqresfound) { ajWarn("No SEQRES record found in raw pdb file"); ajFmtPrintF(logf, "%-15s\n", "NOSEQRES"); /* Free memory and return */ ajListFree(&listelms); ajStrDel(&LastSheetId); ajStrDel(&tmpstr); ajStrDel(&seqres); return ajFalse; } if(!donefirstatom) { ajWarn("No ATOM record found in raw pdb file"); ajFmtPrintF(logf, "%-15s\n", "NOATOM"); /* Free memory and return */ ajListFree(&listelms); ajStrDel(&LastSheetId); ajStrDel(&tmpstr); ajStrDel(&seqres); return ajFalse; } AJCNEW0(pdbfile->nligands, pdbfile->nchains); AJCNEW0(pdbfile->nres, pdbfile->nchains); AJCNEW0(pdbfile->numHelices, pdbfile->nchains); AJCNEW0(pdbfile->numStrands, pdbfile->nchains); AJCNEW0(pdbfile->numSheets, pdbfile->nchains); AJCNEW0(pdbfile->numTurns, pdbfile->nchains); AJCNEW0(pdbfile->chainok, pdbfile->nchains); AJCNEW0(pdbfile->resn1ok, pdbfile->nchains); for(i=0;inchains;i++) { pdbfile->chainok[i]=ajTrue; pdbfile->resn1ok[i]=ajTrue; } /* Free memory and return */ ajStrDel(&LastSheetId); ajListFree(&listelms); ajStrDel(&tmpstr); ajStrDel(&seqres); return ajTrue; } /* @funcstatic SeqresToSequence ******************************************** ** ** Reads a string containing a SEQRES sequence (e.g. "ALA ALA LEU" ) and ** writes a string containing a normal sequence (e.g. "AAL"). ** ** @param [r] seqres [const AjPStr] SEQRES sequence ** @param [w] seq [AjPStr *] Output sequence ** @param [r] camask [AjBool] Whether to ignore residues which do not ** have a C-alpha atom, these are defined as ACE, FOR and NH2 groups. ** @param [w] len [ajint *] Length of sequence INCLUDING ACE, FOR and ** NH2 groups. ** ** @return [AjBool] ajTrue on success, ajFalse otherwise. ** @@ ****************************************************************************/ static AjBool SeqresToSequence(const AjPStr seqres, AjPStr *seq, AjBool camask, ajint *len) { const AjPStr aa3 =NULL; char aa1; ajint nrem=0; /* No. 'residues' that were removed */ /* Check args */ if(!seqres || !seq) { ajWarn("Bad args passed to SeqresToSequence\n"); return ajFalse; } /* Allocate memory */ if((aa3=ajStrParseC(seqres, " \n"))) /* Parse seqres string */ do { if(ajStrMatchC(aa3, "FOR") || ajStrMatchC(aa3, "ACE") || ajStrMatchC(aa3, "NH2")) { if(camask) { nrem++; continue; } } ajResidueFromTriplet(aa3, &aa1); ajStrAppendK(seq, aa1); } while((aa3=ajStrParseC(NULL, " \n"))); else return ajFalse; *len = ajStrGetLen(*seq)+nrem; return ajTrue; } /* @funcstatic CheckChains ************************************************* ** ** Reads a Pdbfile object and checks whether chains from the SEQRES records ** (i) use unique chain ids, (ii) do not use an id of a space (' ') alongside ** non-space chain ids and (iii) contain at least the user-defined threshold ** number of amino acid residues. If any of these conditions are not met then ** the chain is discarded (chainok array is set to ajFalse). ** ** The chainok array is written. ** ** ** @param [w] pdbfile [AjPPdbfile] Pdbfile object ** @param [u] logf [AjPFile] Pointer to log file (build ** diagnostics). ** @param [r] min_chain_size [ajint] Minimum number of amino acids in ** a chain. ** ** @return [AjBool] True if SEQRES records contained at least one protein ** chain, False otherwise. ** @@ ****************************************************************************/ static AjBool CheckChains(AjPPdbfile pdbfile, AjPFile logf, ajint min_chain_size) { ajint i=0; /* Loop counter */ ajint j=0; /* Loop counter */ AjIStr iter=NULL; /* Iterator for sequence strings */ ajint aacnt=0; /* Counter for no. of amino acids in sequence strings */ AjBool ok=ajFalse; /* Flag which is True if amino acid chains are found in the SEQRES records */ char id1=' '; /* Chain id */ char id2=' '; /* Chain id */ AjBool iderr=ajFalse; /* ajTrue if both a space and a character are used as chain id's in the same file */ /* Check args */ if(!pdbfile || !logf) { ajWarn("Bad args passed to CheckChains\n"); return ajFalse; } /* Report problems with chain id's */ for(i=0;inchains; i++) { id1 = ajChararrGet(pdbfile->chid, i); for(j=i+1; jnchains; j++) { if(id1==(id2=ajChararrGet(pdbfile->chid, j))) { ajFmtPrintF(logf, "%-15s%d (%c) %d (%c)\n", "CHAINIDS", i+1, ajChararrGet(pdbfile->chid, i), j+1, ajChararrGet(pdbfile->chid, j)); pdbfile->chainok[i] = ajFalse; pdbfile->chainok[j] = ajFalse; } if((((id1 == ' ')&&(id2 != ' '))||((id2 == ' ')&&(id1 != ' '))) && (!(iderr))) { ajFmtPrintF(logf, "%-15s\n", "CHAINIDSPC"); iderr=ajTrue; } } } /* Report problems with non-protein chains */ for(i=0;inchains; i++) { if(!pdbfile->chainok[i]) continue; aacnt=0; iter=ajStrIterNew(pdbfile->seqres[i]); if(toupper((int) ajStrIterGetK(iter)) != 'X') ++aacnt; while(ajStrIterNext(iter)) if(toupper((int) ajStrIterGetK(iter)) != 'X') if(++aacnt >= min_chain_size) break; ajStrIterDel(&iter); if(aacnt==0) { ajFmtPrintF(logf, "%-15s%d (%c)\n", "SEQRESNOAA", i+1, ajChararrGet(pdbfile->chid, i)); pdbfile->chainok[i] = ajFalse; } else if(aacnt < min_chain_size) { ajFmtPrintF(logf, "%-15s%d (%c)\n", "SEQRESFEWAA", i+1, ajChararrGet(pdbfile->chid, i)); pdbfile->chainok[i] = ajFalse; } else { ok = ajTrue; } } /* Return now if no protein chains are found */ if(!ok) { ajWarn("No protein chains found in raw pdb file"); ajFmtPrintF(logf, "%-15s\n", "NOPROTEINS"); return ajFalse; } return ajTrue; } /* @funcstatic CheckTer **************************************************** ** ** Reads a Pdbfile object and checks whether the expected number of TER ** and MODEL records are present. Any unwanted records (e.g. TER records that ** delimit fragments of chain digests and duplicate MODEL records) are ** discared (the linetype array for the lines are set to PDBPARSE_IGNORE). ** ** The linetype array and modcnt variable may be modified. The toofewter ** element is written. ** The value of modcnt is reduced by 1 for each MODEL record that was masked ** but this is not done for tercnt. ** ** @param [w] pdbfile [AjPPdbfile] Pdbfile object ** @param [u] logf [AjPFile] Pointer to log file (build diagnostics) ** ** @return [AjBool] True on success, False otherwise. ** @@ ****************************************************************************/ static AjBool CheckTer(AjPPdbfile pdbfile, AjPFile logf) { AjBool toomany=ajFalse; AjBool toofew =ajFalse; ajint i=0; /* Loop counter */ AjPStr aa1=NULL; AjPStr aa2=NULL; char aa=' '; /* Check args */ if(!pdbfile || !logf) { ajWarn("Bad args passed to CheckTer\n"); return ajFalse; } /* Allocate memory */ aa1=ajStrNew(); aa2=ajStrNew(); /* Report problems with TER records */ if(!pdbfile->tercnt) ajFmtPrintF(logf, "%-15s\n", "TERNONE"); else { if(pdbfile->method==ajNMR) { if(pdbfile->tercnt > (pdbfile->nchains * pdbfile->modcnt)) toomany=ajTrue; else if(pdbfile->tercnt < (pdbfile->nchains * pdbfile->modcnt)) toofew=ajTrue; } else { if(pdbfile->tercnt > pdbfile->nchains) toomany=ajTrue; else if(pdbfile->tercnt < pdbfile->nchains) toofew=ajTrue; } } /* Report diagnostics */ if(toomany) ajFmtPrintF(logf, "%-15s\n", "TERTOOMANY"); else if(toofew) { ajFmtPrintF(logf, "%-15s\n", "TERTOOFEW"); pdbfile->toofewter=ajTrue; } /* Mask out the extra TER records */ if(toomany) { for(i=pdbfile->idxfirst;inlines;i++) { /* This is a TER record. Mask it out if it is flanked by ATOM or HETATM records for AMINO ACIDS and with identical chain ids. */ if(pdbfile->linetype[i]==PDBPARSE_TER) { if((pdbfile->linetype[i-1]==PDBPARSE_COORD) && (pdbfile->linetype[i+1]==PDBPARSE_COORD) && (ajStrGetCharPos(pdbfile->lines[i-1], POS_CHID) ==ajStrGetCharPos(pdbfile->lines[i+1], POS_CHID))) { ajStrAssignSubS(&aa1, pdbfile->lines[i-1], 17, 19); ajStrAssignSubS(&aa2, pdbfile->lines[i+1], 17, 19); if((ajResidueFromTriplet(aa1, &aa)) && (ajResidueFromTriplet(aa2, &aa))) { pdbfile->linetype[i]=PDBPARSE_IGNORE; } } } } } /* Check for duplicate MODEL records */ for(i=pdbfile->idxfirst+1; inlines;i++) { if((pdbfile->linetype[i-1]==PDBPARSE_MODEL) && (pdbfile->linetype[i]==PDBPARSE_MODEL)) { pdbfile->linetype[i-1]=PDBPARSE_IGNORE; pdbfile->modcnt--; ajFmtPrintF(logf, "%-15s%d\n", "MODELDUP", i+1); } } /*Tidy up and return */ ajStrDel(&aa1); ajStrDel(&aa2); return ajTrue; } /* @funcstatic NumberChains ************************************************ ** ** Reads a Pdbfile object and assigns each ATOM or HETATM record to a specific ** chain and model. Lines containing coordinates for water molecules and ** other non-protein groups ("heterogens") are identified. Water molecules are ** uniquely associated with a whole model whereas other non-protein groups are ** associated with a unique chain if possible. If this is not possible they ** are assigned a unique group number. ** ** The modn, chnn and gpn arrays are written. The linetype array is modified. ** The nligands array (count of groups associated with a chain), ngroups ** element (count of groups not associated with a chain) and gpid (identifiers ** of these later groups) are written. ** ** linetype array ** The linetype array is written with a value of PDBPARSE_COORDWAT for lines ** containing coordinates for water, and to PDBPARSE_COORDHET or ** PDBPARSE_COORDGP for non-protein groups that, respectively, could or could ** not be uniquely associated with a chain. ** ** ** @param [w] pdbfile [AjPPdbfile] Pdbfile object ** @param [u] logf [AjPFile] Log file (build diagnostics) ** ** @return [AjBool] True on success, False otherwise ** @@ ****************************************************************************/ static AjBool NumberChains(AjPPdbfile pdbfile, AjPFile logf) { ajint i=0; /* Loop counter */ ajint j=0; /* Loop counter */ ajint mod=0; /* Model number */ char id=' '; /* Chain id */ AjBool done=ajFalse; /*True if we have assigned a chain id for this line */ ajint this=0; /* Chain number of last line read in */ ajint chn=0; /* Chain number as index (starting from 0) */ AjPInt gpns=NULL; /* Gives the correct group number for groups that could not be identified as belonging to a chain, in cases where a single chain only is present in the file */ ajint gpn=0; /* Current group number */ ajint offset=0; /* Offset for finding correct value for gpns (for use with files with a single chain only */ AjBool *chndone=NULL; /* Array whose elements are TRUE if we have already read a line in belonging to the appropriate chain for this model*/ AjPStr *htype; /* Array holding the residue type of the last heterogen read in for the appropriate chain */ /* Check args */ if(!pdbfile || !logf) { ajWarn("Bad args passed to NumberChains\n"); return ajFalse; } /* Allocate memory */ gpns = ajIntNew(); AJCNEW0(htype, pdbfile->nchains); for(i=0;inchains; i++) htype[i]=ajStrNew(); AJCNEW0(chndone, pdbfile->nchains); for(i=0;inchains; i++) chndone[i]=ajFalse; if(((pdbfile->method==ajNMR) && pdbfile->nomod) || (pdbfile->method==ajXRAY)) mod=1; for(i=pdbfile->idxfirst;inlines;i++) { /* JONNEW Lines may already have been assigned to PDBPARSE_COORDWAT in FirstPass function, so we need to check here to ensure model number gets assigned */ if((pdbfile->linetype[i]==PDBPARSE_COORD) || (pdbfile->linetype[i]==PDBPARSE_COORDWAT)) { pdbfile->modn[i]=mod; /*Assign appropriate chain number to any ATOM or HETATM line which has an id exhibited in the SEQRES records. */ done=ajFalse; id=ajStrGetCharPos(pdbfile->lines[i], POS_CHID); for(j=0;jnchains; j++) { if(ajChararrGet(pdbfile->chid, j) == id) { pdbfile->chnn[i]=j+1; chn=pdbfile->chnn[i]-1; this=j+1; if(chndone[this-1]) { /* Mark up water coordintes */ if(ajStrMatchC(pdbfile->rtype[i], "HOH")) pdbfile->linetype[i]=PDBPARSE_COORDWAT; else { /* Mark up ligand coordinates */ pdbfile->linetype[i]=PDBPARSE_COORDHET; /* New heterogen */ if(!ajStrMatchS(htype[chn], pdbfile->rtype[i])) { offset++; pdbfile->nligands[chn]++; pdbfile->gpn[i] = pdbfile->nligands[chn]; ajStrAssignS(&htype[chn], pdbfile->rtype[i]); } /* More atoms of the same heterogen */ else { pdbfile->gpn[i] = pdbfile->nligands[chn]; } } } /* Unused else doneter=ajFalse; */ done=ajTrue; break; } } if(!done) { /* Any ATOM or HETATM record with a whitspace as chain id and which has not already been assigned belongs to the chain immediately preceeding it. Assign these lines as NON_PROTEIN_CHAIN lines. */ if(id==' ') { /* This won't be set until we've read in at least one coordinate line for protein chain, so if this==0, set it to 1 (first chain). This prevents problems for 1qjh.pxyz*/ if(this==0) this=1; pdbfile->chnn[i]=this; chn=pdbfile->chnn[i]-1; /* Mark up water coordintes */ if(ajStrMatchC(pdbfile->rtype[i], "HOH")) pdbfile->linetype[i]=PDBPARSE_COORDWAT; else { /* Mark up ligand coordinates */ pdbfile->linetype[i]=PDBPARSE_COORDHET; /* New ligand */ if(!ajStrMatchS(htype[chn], pdbfile->rtype[i])) { offset++; pdbfile->nligands[chn]++; pdbfile->gpn[i] = pdbfile->nligands[chn]; ajStrAssignS(&htype[chn], pdbfile->rtype[i]); } /* More atoms of the same heterogen */ else { pdbfile->gpn[i] = pdbfile->nligands[chn]; } } } else /* Assign any ATOM or HETATM records with a non- whitespace chain id that does not appear in the SEQRES records as a NON_PROTEIN_CHAIN. Record the chain id's used and assign a GROUP NUMBER as appropriate.*/ { /* Mark up water coordintes */ if(ajStrMatchC(pdbfile->rtype[i], "HOH")) pdbfile->linetype[i]=PDBPARSE_COORDWAT; else { /* If there is a single chain only then the group is of course associated with that chain */ if(pdbfile->nchains==1) { /* Mark up ligand coordinates */ pdbfile->linetype[i]=PDBPARSE_COORDHET; pdbfile->chnn[i]=1; for(done=ajFalse, j=0;jngroups; j++) if(ajChararrGet(pdbfile->gpid, j) == id) { pdbfile->gpn[i]=ajIntGet(gpns, j); done=ajTrue; break; } if(!done) { ajIntPut(&gpns, j, (gpn=j+1+offset)); pdbfile->gpn[i]=gpn; /* NO - this code would be used only if the group could not be associated with a chain. ajChararrPut(&(pdbfile->gpid), pdbfile->ngroups, id); pdbfile->ngroups++; */ /* Increment number of ligands and reset last ligand type read in */ pdbfile->nligands[0]++; ajStrAssignC(&htype[0], " "); } } else { /* Mark up ligand coordinates */ pdbfile->linetype[i]=PDBPARSE_COORDGP; for(done=ajFalse, j=0;jngroups; j++) if(ajChararrGet(pdbfile->gpid, j) == id) { pdbfile->gpn[i]=j+1; done=ajTrue; break; } if(!done) { ajChararrPut(&(pdbfile->gpid), pdbfile->ngroups, id); pdbfile->ngroups++; pdbfile->gpn[i]=j+1; } } } } } } else if(pdbfile->linetype[i]==PDBPARSE_MODEL) { mod++; /* doneter=ajFalse; Unused variable */ for(j=0;jnchains; j++) chndone[j]=ajFalse; /* doneoneter=ajFalse; Unused variable */ } else if(pdbfile->linetype[i]==PDBPARSE_TER) { chndone[this-1]=ajTrue; /* prev = this; Unused variable */ /* doneoneter=ajTrue; Unused variable */ /* doneter=ajTrue; Unused variable */ } /* Check for missing TER records. Where chains are not separated by TER records (the chain id changes from line to line without an intervening TER record and both chain id's are not whitespace). Where ATOM and HETATM groups are not separated by TER records (a chain id is given on one line, a whitspace chain id is given on the next line, and there is no intervening TER record). This code is identical to a fragment from CheckTer. Note that chndone only is modified. If the code in the function was made to use prev, doneoneter, doneter then the code below would also have to be modified Only do this now where there aren't enough TER records in the file. Without this check, it was failing for cases where the order of chains is inconsistent (see around line 4095 of pdb1cm4.ent) */ if(pdbfile->toofewter) { if(i>pdbfile->idxfirst) if(pdbfile->linetype[i-1]==PDBPARSE_COORD && ((pdbfile->linetype[i]==PDBPARSE_COORD)|| (pdbfile->linetype[i]==PDBPARSE_COORDHET)) && ((ajStrGetCharPos(pdbfile->lines[i-1], POS_CHID)) != ajStrGetCharPos(pdbfile->lines[i], POS_CHID))) { for(j=0;jnchains; j++) if(ajChararrGet(pdbfile->chid, j) ==ajStrGetCharPos(pdbfile->lines[i-1], POS_CHID)) { chndone[j]=ajTrue; break; } } } } /* The above code cannot cope for cases where the ATOM and HETATM records use the same (or no) chain identifier and are not separated by a TER record (e.g. 1rbp) For files with less than the expected number of TER records, check again for COORDHET lines, which are identified as (i) a line beginning with a HETATM record with the same chain identifier but lower residue number than the preceeding line, or JONNEW (ii) a line beginning with a HETATM record which is not followed anywhere in the file by an ATOM record with the same chain identifier (from the PDB record) or number (assigned by parser) */ for(i=0;inchains;i++) ajStrAssignC(&htype[i], "\0"); if(pdbfile->tercnt < (pdbfile->nchains * pdbfile->modcnt)) for(i=pdbfile->idxfirst+1;inlines;i++) { chn=pdbfile->chnn[i]-1; if(pdbfile->linetype[i-1]==PDBPARSE_COORD && pdbfile->linetype[i]==PDBPARSE_COORD && ((ajStrGetCharPos(pdbfile->lines[i-1], POS_CHID)) == ajStrGetCharPos(pdbfile->lines[i], POS_CHID))) if(ajStrPrefixC(pdbfile->lines[i],"HETATM")) if((pdbfile->resn1[i]resn1[i-1]) || NoMoreAtoms(pdbfile, i)) /* if(pdbfile->resn1[i]resn1[i-1]) */ while((ajStrPrefixC(pdbfile->lines[i],"HETATM"))) { if(ajStrMatchC(pdbfile->rtype[i], "HOH")) pdbfile->linetype[i]=PDBPARSE_COORDWAT; else { pdbfile->linetype[i]=PDBPARSE_COORDHET; /* New heterogen */ if(!ajStrMatchS(htype[chn], pdbfile->rtype[i])) { offset++; pdbfile->nligands[chn]++; pdbfile->gpn[i] = pdbfile->nligands[chn]; ajStrAssignS(&htype[chn], pdbfile->rtype[i]); } /* More atoms of the same heterogen */ else { pdbfile->gpn[i] = pdbfile->nligands[chn]; } } i++; } } /* For files with a single chain oly, set the number of groups that could not be associated with a chain to zero */ if(pdbfile->nchains==1) { /* We might need to add ngroups to nligands[0] */ if(pdbfile->ngroups != 0) ajFatal("Must check ngroups versus nligands in the file"); pdbfile->ngroups = 0; } /* Tidy up and return */ for(i=0;inchains; i++) ajStrDel(&htype[i]); AJFREE(htype); ajIntDel(&gpns); AJFREE(chndone); return ajTrue; } /* @funcstatic NoMoreAtoms ************************************************ ** ** This function is called by function to identify ligands ** (COORDHET lines) in files with less than the expected number of TER ** records. ** These are identified here by a line beginning with a HETATM record which ** is not followed by an ATOM record with the same chain identifier (from ** the PDB record) or number (assigned by parser). The function returns if ** a line of a different chain is found or at the first non-ATOM/HETAM line. ** Additional processing is done in itself. ** ** @param [w] pdbfile [AjPPdbfile] Pdbfile object ** @param [r] linen [ajint] Line number ** ** @return [AjBool] True (no more atoms), False otherwise ** @@ ****************************************************************************/ static AjBool NoMoreAtoms(AjPPdbfile pdbfile, ajint linen) { ajint i=0; for(i=linen+1;inlines;i++) { if((ajStrPrefixC(pdbfile->lines[i],"ATOM"))) { /* Same chain */ if((ajStrGetCharPos(pdbfile->lines[linen], POS_CHID) == ajStrGetCharPos(pdbfile->lines[i], POS_CHID)) || (pdbfile->chnn[linen] == pdbfile->chnn[i])) return ajFalse; else /* Different chain */ return ajTrue; } else if((ajStrPrefixC(pdbfile->lines[i],"HETATM"))) { /* Different chain */ if((ajStrGetCharPos(pdbfile->lines[linen], POS_CHID) != ajStrGetCharPos(pdbfile->lines[i], POS_CHID)) || (pdbfile->chnn[linen] != pdbfile->chnn[i])) return ajTrue; } else /* Different chain or near EOF */ return ajTrue; } return ajTrue; } /* @funcstatic MaskChains ***************************************************** ** ** Reads a Pdbfile object and checks to see whether the ATOM records for ** each chain contain sufficient amino acids. Any chains with insufficient ** amino acids either in the SEQRES or ATOM records, or with ambiguous chain ** id's are discarded. Optionally, amino acid residues and non-amino ** acid groups (e.g. ACE, NH2 etc) in protein chains with no CA atom are also ** discarded (the linetype array for the lines are set to PDBPARSE_IGNORE). ** For non-amino acid groups, the corresponding characters are removed from ** the sequence derived from the SEQRES records. Coordinate data and atom ** type are parsed for each atom. Optionally, amino acids or groups in ** protein chains with a single atom only are also discarded. ** ** ** Checks whether chains from the ATOM records contain at least the ** user-defined threshold number of amino acid residues. If not then the chain ** is discarded (chainok array is set to ajFalse). If NO chains with ** sufficient residues are found, a "NOPROTEINS" error is generated and ** ajFalse is returned. ** ** Writes the x,y,z,o,b and atype elements of a Pdbfile object. The linetype, ** and possibly seqres, seqresful and nres arrays are modified. ** ** linetype array ** Coordinate data are extracted for lines of linetype PDBPARSE_COORD, ** PDBPARSE_COORDHET and PDBPARSE_COORDGP. ** ** seqres & seqresful arrays ** Three-letter codes of any groups that are (i) not standard amino acids and ** (ii) which do not contain a CA atom are removed from the seqres sequence ** if the is set. The seqresful array is an intermediate array to ** achieve this. ** ** @param [w] pdbfile [AjPPdbfile] Pdbfile object ** @param [u] logf [AjPFile] Log file (build diagnostics) ** @param [r] min_chain_size [ajint] Min. no. of amino acids in a chain ** @param [r] camask [AjBool] Whether to mask non-amino acid ** residues within protein chains which ** do not have a C-alpha atom (remove them ** from the seqres sequence and set the ** linetype array for the lines ** to PDBPARSE_IGNORE). ** @param [r] camask1 [AjBool] Whether to mask amino acid residues ** within protein chains which do not have ** a C-alpha atom (set the linetype ** array for the lines to PDBPARSE_IGNORE). ** @param [r] atommask [AjBool] Whether to mask residues or groups ** with a single atom only. ** ** @return [AjBool] True on success, False otherwise ** @@ ****************************************************************************/ static AjBool MaskChains(AjPPdbfile pdbfile, AjPFile logf, ajint min_chain_size, AjBool camask, AjBool camask1, AjBool atommask) { ajint i=0; /*Loop counter */ ajint j=0; /*Loop counter */ AjPStr aa3=NULL; /*Amino acid */ ajint rcnt=0; /* Residue count */ ajint acnt=0; /* Atom count */ ajint modcnt=0; /*Count of MODEL records */ ajint lastatm=0; /*Line number of last coordinate line read in*/ ajint firstatm=0; /*Line number of coordinate line for first atom of residue */ AjBool noca=ajFalse; /*True if this residue does not contain a CA atom */ ajint lastchn=0; /*Chain number of last line read in*/ AjBool *chainok; /*Array of flags which are True if a chain in the SEQRES records is found in the ATOM records */ char aa1=' '; /*Amino acid id */ AjPStr lastrn=NULL; /*Number of last residue read in */ AjBool msgdone=ajFalse; /* Flag for message reporting */ AjPStr sub=NULL; AjPStr tmpseq=NULL; ajint lenful=0; /* Length of SEQRES sequence including ACE, FOR & NH2 groups that might be discarded by the call to SeqresToSequence */ ajuint ipos = 0; char tmp=' '; AjBool odd=ajFalse; /* Whether the current residue / group is of unknown type */ AjBool ok =ajFalse; /* True if the file, after processing by this function, is found to contain at least one chain for which chainok == ajTrue */ /* Check args */ if(!pdbfile || !logf) { ajWarn("Bad args passed to MaskChains\n"); return ajFalse; } /* Allocate memory */ AJCNEW0(chainok, pdbfile->nchains); for(i=0;inchains;i++) chainok[i]=ajFalse; aa3=ajStrNew(); lastrn=ajStrNew(); sub=ajStrNew(); ajStrAssignClear(&sub); firstatm=lastatm=pdbfile->idxfirst; for(i=pdbfile->idxfirst;inlines;i++) { if((pdbfile->linetype[i]==PDBPARSE_COORD) || (pdbfile->linetype[i]==PDBPARSE_COORDHET) || (pdbfile->linetype[i]==PDBPARSE_COORDGP) || (pdbfile->linetype[i]==PDBPARSE_COORDWAT)) { /* Assign x, y, z, o, b data */ /* Replace this with code so that internals of structure are not probed (when code becomes available)*/ if( (sscanf(&(pdbfile->lines[i]->Ptr[28]), "%f %f %f %f", &(pdbfile->x[i]), &(pdbfile->y[i]), &(pdbfile->z[i]), &(pdbfile->o[i]))) != 4) ajFatal("Scan error in MaskChains\n" "Email jison@hgmp.mrc.ac.uk"); if(!sscanf(&(pdbfile->lines[i]->Ptr[60]), "%f", &(pdbfile->b[i]))) ajFatal("Scan error in MaskChains\n" "Email jison@hgmp.mrc.ac.uk"); /* Usually position 12 is used for the alternative position indicator (taken in the code below to be indicated by a number) for atoms, but occasionally can be incorrectly used for the atom type itself (indicated in the code below by a character in pos 12). This code copes for both cases */ /* Assign atom type */ if(isalpha((int)pdbfile->lines[i]->Ptr[12])) { ajStrAssignSubS(&pdbfile->atype[i], pdbfile->lines[i], 12, 15); ajStrRemoveWhite(&pdbfile->atype[i]); if(!msgdone) { ajFmtPrintF(logf, "%-15s%d\n", "ATOMCOL12", i+1); msgdone=ajTrue; } } else { ajStrAssignSubS(&pdbfile->atype[i], pdbfile->lines[i], 13, 15); ajStrRemoveWhite(&pdbfile->atype[i]); } } if(pdbfile->linetype[i]==PDBPARSE_COORD) { /* Check whether CA atom is present */ if(!(ajStrCmpC(pdbfile->atype[i], "CA"))) noca=ajFalse; if(pdbfile->chnn[i]chnn[i]!=lastchn) { rcnt=0; lastchn=pdbfile->chnn[i]; } /* If this is a new residue */ if(!ajStrMatchS(pdbfile->pdbn[i], lastrn)) { /* Mask coordinate lines where there are only a single atom */ if(acnt==1) { ajFmtPrintF(logf, "%-15s%d\n", "ATOMONEONLY", lastatm+1); if(atommask) { pdbfile->linetype[lastatm]=PDBPARSE_IGNORE; } } /* Mask coordinate lines for residues lacking a CA atom */ if(noca) { odd = (!(ajResidueFromTriplet(pdbfile->rtype[lastatm-1], &tmp))); if((camask && odd) || (camask1 && !odd)) for(j=firstatm; j<=lastatm; j++) pdbfile->linetype[j]=PDBPARSE_IGNORE; /* Remove residues from SEQRES records */ if((camask && odd)) { ipos = pdbfile->chnn[firstatm] - 1; ajStrExchangeSS(&pdbfile->seqresful[ipos], pdbfile->rtype[firstatm], sub); } if(firstatm == lastatm) { ajFmtPrintF(logf, "%-15s%d\n", "ATOMNOCA", firstatm+1); } else ajFmtPrintF(logf, "%-15s%d %d\n", "ATOMNOCA", firstatm+1, lastatm+1); } /* Increment the residue counter if the code is recognised */ if(ajResidueFromTriplet(pdbfile->rtype[i], &aa1)) rcnt++; if(rcnt>=min_chain_size) chainok[pdbfile->chnn[i]-1]=ajTrue; ajStrAssignS(&lastrn, pdbfile->pdbn[i]); /* Set count of atoms to zero, set the position of the first atom and set flag for recognising CA atom*/ acnt=1; firstatm=i; if(!(ajStrCmpC(pdbfile->atype[i], "CA"))) noca=ajFalse; else noca=ajTrue; } /* Set the position for the last atom read in */ lastatm=i; /* Increment the atom counter */ acnt++; } else if(pdbfile->linetype[i]==PDBPARSE_MODEL) { rcnt=0; lastchn=0; modcnt++; if(modcnt!=1) { for(j=0;jnchains;j++) { /* Only bother reporting error messages if a message about the SEQRES records not containing enough aa's has not already been reported */ /* If pdbfile->chainok is False, leave it so */ if(!pdbfile->chainok[j]) continue; else { if(!chainok[j]) { pdbfile->chainok[j]=chainok[j]; ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "ATOMFEWAA", j+1, (ajChararrGet(pdbfile->chid, j)), modcnt); } } } } } } /* Ensure that C-terminal residues are masked if necessary */ /* else if( (pdbfile->linetype[i]==PDBPARSE_TER)|| (pdbfile->linetype[i]==PDBPARSE_ENDMDL)) {*/ if(noca) { odd = (!(ajResidueFromTriplet(pdbfile->rtype[lastatm-1],&tmp))); if((camask && odd) || (camask1 && !odd)) for(j=firstatm; j<=lastatm; j++) pdbfile->linetype[j]=PDBPARSE_IGNORE; /* Remove residues from SEQRES records */ if((camask && odd)) { ipos = pdbfile->chnn[firstatm] - 1; ajStrExchangeSS(&pdbfile->seqresful[ipos], pdbfile->rtype[firstatm], sub); } if(firstatm == lastatm) { ajFmtPrintF(logf, "%-15s%d\n", "ATOMNOCA", firstatm+1); } else ajFmtPrintF(logf, "%-15s%d %d\n", "ATOMNOCA", firstatm+1, lastatm+1); } /* } */ /* Write the new (masked) seqres sequences if necessary */ if(camask) { for(i=0;inchains;i++) { tmpseq=ajStrNew(); if(!SeqresToSequence(pdbfile->seqresful[i], &tmpseq, camask, &lenful)) { ajFatal("Sequence conversion error in " "FirstPass\nEmail jison@hgmp.mrc.ac.uk\n"); } ajStrAssignS(&pdbfile->seqres[i], tmpseq); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); ajStrDel(&tmpseq); } } /* Set modcnt to 1 for xray structures */ if(!modcnt) modcnt=1; /*Check for xray structures or last model of nmr structures */ for(i=0;inchains;i++) { /* Only bother reporting error messages if a message about the SEQRES records not containing enough aa's has not already been reported */ /* If pdbfile->chainok is False, leave it so */ if(!pdbfile->chainok[i]) continue; else { if(!chainok[i]) { pdbfile->chainok[i]=chainok[i]; ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "ATOMFEWAA", i+1,(ajChararrGet(pdbfile->chid, i)),modcnt); } } } /* Mask out any chains with insufficient amino acids either in the SEQRES or ATOM records */ for(i=pdbfile->idxfirst;inlines;i++) if(((pdbfile->linetype[i]==PDBPARSE_COORD) || (pdbfile->linetype[i]==PDBPARSE_COORDHET)) && (!pdbfile->chainok[pdbfile->chnn[i]-1])) { pdbfile->linetype[i]=PDBPARSE_IGNORE; } /* Check for missing TER records. Where chains are not separated by TER records (the chain id changes from line to line without an intervening TER record and both chain id's are not whitespace). Where ATOM and HETATM groups are not separated by TER records (a chain id is given on one line, a whitspace chain id is given on the next line, and there is no intervening TER record) */ for(i=pdbfile->idxfirst+1; inlines;i++) { if((pdbfile->linetype[i-1]==PDBPARSE_COORD) && (pdbfile->linetype[i]==PDBPARSE_COORD) && pdbfile->chnn[i-1] != pdbfile->chnn[i]) ajFmtPrintF(logf, "%-15s%d %d\n", "TERMISSCHN", i, i+1); else if((pdbfile->linetype[i-1]==PDBPARSE_COORD) && ((pdbfile->linetype[i]==PDBPARSE_COORDHET) || (pdbfile->linetype[i]==PDBPARSE_COORDWAT))&& pdbfile->chnn[i-1] == pdbfile->chnn[i]) ajFmtPrintF(logf, "%-15s%d %d\n", "TERMISSHET", i, i+1); } /* Tidy up */ AJFREE(chainok); ajStrDel(&aa3); ajStrDel(&lastrn); ajStrDel(&sub); /* Report problems with non-protein chains */ for(i=0;inchains; i++) if(pdbfile->chainok[i]) { ok = ajTrue; break; } /* Return now if no protein chains are found */ if(!ok) { ajWarn("No protein chains found in raw pdb file"); ajFmtPrintF(logf, "%-15s\n", "NOPROTEINS"); return ajFalse; } return ajTrue; } /* @funcstatic StandardiseNumbering **************************************** ** ** Reads a Pdbfile object and standardises the two sets of residue numbers ** (resn1 & resn2 arrays) derived from the raw residue numbers. The residue ** numbering is corrected for zero or negative residue numbers, non-standard ** numbering schemes and any other cases of non-sequentiality (e.g. where the ** next residue number is lower than the previous one, see 1pca). ** resn1 gives the sequence presuming an alternative numbering scheme, resn2 ** gives the sequence presuming heterogeneity. Heterogeneity is indicated by ** a character in position lines[26] (the same position used to indicate ** alternative residue numbering schemes). ** ** The resn1 & resn2 arrays of a Pdbfile object are modified. The oddnum ** array is written. ** ** oddnum array ** This is an array of Bool's which are TRUE for duplicate residues of ** heterogenous positions (e.g. if 2 different residues are both numbered '8' ** or one is '8' and the other '8A' for example then would be set ** True for the second residue. ** ** @param [w] pdbfile [AjPPdbfile] Pdbfile object ** @param [u] logf [AjPFile] Pointer to log file (build diagnostics) ** ** @return [AjBool] True on success, False otherwise ** @@ ****************************************************************************/ static AjBool StandardiseNumbering(AjPPdbfile pdbfile, AjPFile logf) { ajint i=0; ajint lastchn=-1; /* Chain number of last line read in */ AjBool first=ajFalse; /* True if we have processed the first residue in a chain */ ajint first_num=0; /* Number of first residue in chain */ ajint modrn=0; /* Corrected first residue number */ AjBool neg=ajFalse; /* True if first residue number is negative */ AjBool zer=ajFalse; /* True if first residue number is zero */ AjBool report_neg=ajFalse; /* True if we have reported an error that a residue number is negative for this chain*/ AjBool report_zer=ajFalse; /* True if we have reported an error that a residue number is zero for this chain*/ ajint add=0; /* An amount to add to the residue numbers to correct them */ AjBool ignore=ajFalse; AjBool odd=ajFalse; ajint rn=0; /* Current residue number */ ajint last_rn=0; /* Last residue number read in */ ajint this_rn=0; /* Current residue number read in */ char last=' '; /* Chain id of last chain */ char curr=' '; /* Chain id of current chain */ AjPStr last_rt=NULL; /* Type of previous residue */ AjPStr this_rt=NULL; /* Type of current residue */ AjBool report_nonstd=ajFalse; /* True if we have reported an error that a non-standard residue numbering scheme is used for this chain */ AjBool report_nonsqt=ajFalse; /* True if we have reported an error that any other cases of non-sequential numbering are found for this this chain */ char aa1=' '; /* Amino acid single character code */ ajuint ipos = 0; last_rt=ajStrNew(); this_rt=ajStrNew(); /* Check args */ if(!pdbfile || !logf) { ajWarn("Bad args passed to StandardiseNumbering\n"); return ajFalse; } /* Check whether the integer part of the original pdb numbering (at this point in code held in resn1 and resn2) gives the correct index into the SEQRES sequence */ for(i=pdbfile->idxfirst;inlines;i++) { /* If model number has gone past 1 then we must have checked all the chains, so break */ if(pdbfile->modn[i] > 1) break; if(pdbfile->linetype[i]!=PDBPARSE_COORD) continue; /* If residue number is not negative, zero, or greater then the length of the SEQRES sequence and if the residue matches then continue */ if((pdbfile->resn1[i] <= pdbfile->nres[pdbfile->chnn[i]-1])|| (pdbfile->resn1[i] >= 1)) { ajResidueFromTriplet(pdbfile->rtype[i],&aa1); ipos = pdbfile->chnn[i] - 1; if(aa1 == ajStrGetCharPos(pdbfile->seqres[ipos], pdbfile->resn1[i]-1)) continue; } /* Otherwise flag an error for this chain and move to the end of the chain */ ajFmtPrintF(logf, "%-15s%d (%c)\n", "BADINDEX", pdbfile->chnn[i], ajChararrGet(pdbfile->chid,pdbfile->chnn[i]-1)); for(lastchn=pdbfile->chnn[i]; inlines;i++) { if(pdbfile->linetype[i]!=PDBPARSE_COORD) continue; if(pdbfile->modn[i] > 1) break; if(pdbfile->chnn[i] != lastchn) { i--; break; } } } /* Fix for zero or negative residue numbers. This is done for both resn1 and resn2 arrays of a Pdbfile object */ for(first=ajFalse, i=pdbfile->idxfirst;inlines;i++) { /* Co-ordinate line */ if(pdbfile->linetype[i]==PDBPARSE_COORD) { /* New chain */ if(pdbfile->chnn[i]!=lastchn) { neg=ajFalse; zer=ajFalse; report_neg=ajFalse; report_zer=ajFalse; first=ajFalse; lastchn=pdbfile->chnn[i]; } rn=pdbfile->resn1[i]; if(!first) { first_num = rn; if(first_num > 0) { /* Advance counter to next chain */ for( ;inlines;i++) if(((pdbfile->linetype[i]== PDBPARSE_COORD)&&(pdbfile->chnn[i]!=lastchn)) || pdbfile->linetype[i]==PDBPARSE_MODEL) { neg=ajFalse; zer=ajFalse; report_neg=ajFalse; report_zer=ajFalse; first=ajFalse; lastchn=-1; break; } /* i will get incremented in main loop above */ i--; continue; } first=ajTrue; } if(rn < 0) { neg=ajTrue; if(zer) modrn=rn-(first_num-1); else /* if(neg && !zer) */ modrn=rn-(first_num-1); if(!report_neg) { ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "NEGNUM", pdbfile->chnn[i], ajChararrGet(pdbfile->chid, (pdbfile->chnn[i]-1)),i+1); report_neg=ajTrue; } } else if (rn == 0) { zer=ajTrue; if(neg) modrn=rn-(first_num-1); else /* if(!neg) */ modrn=rn+1; if(!report_zer) { ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "ZERNUM", pdbfile->chnn[i], ajChararrGet(pdbfile->chid, (pdbfile->chnn[i]-1)),i+1); report_zer=ajTrue; } } else /*rn is (+ve)*/ { if(!neg && zer) modrn=rn+1; else if(neg && zer) modrn=rn-(first_num-1); else /* if(neg && !zer) */ modrn=rn-(first_num); } pdbfile->resn1[i]=modrn; pdbfile->resn2[i]=modrn; } /* New model */ else if(pdbfile->linetype[i]==PDBPARSE_MODEL) { neg=ajFalse; zer=ajFalse; report_neg=ajFalse; report_zer=ajFalse; first=ajFalse; lastchn=-1; } } /* Fix non-standard residue numbering scheme. This is done for resn1 array of a Pdbfile object only. */ for(lastchn=-1, i=pdbfile->idxfirst;inlines;i++) { /* Co-ordinate line */ if(pdbfile->linetype[i]==PDBPARSE_COORD) { /* New chain */ if(pdbfile->chnn[i]!=lastchn) { add = 0; first=ajFalse; ignore=ajFalse; lastchn=pdbfile->chnn[i]; report_nonstd=ajFalse; } if(!first) { /*Remove the chmyotrypsin numbering code*/ last_rn = pdbfile->resn1[i]; last=ajStrGetCharPos(pdbfile->lines[i], 26); first=ajTrue; continue; } rn = pdbfile->resn1[i]; curr=ajStrGetCharPos(pdbfile->lines[i], 26); if(curr!=last) { if(rn == last_rn) { add++; ignore=ajTrue; } } if(rn != last_rn) { ignore = ajFalse; } last=curr; last_rn = rn; pdbfile->resn1[i]=rn+add; if(ignore) { pdbfile->oddnum[i]=ajTrue; if(!report_nonstd) { ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "ODDNUM", pdbfile->chnn[i], ajChararrGet(pdbfile->chid, (pdbfile->chnn[i]-1)),i+1); report_nonstd=ajTrue; } } } else if(pdbfile->linetype[i]==PDBPARSE_MODEL) { add = 0; first=ajFalse; ignore=ajFalse; lastchn=-1; report_nonstd=ajFalse; } } /* Fix remaining non-sequential residue numbering in resn1 array of Pdbfile object*/ for(lastchn=-1, i=pdbfile->idxfirst;inlines;i++) { /* Co-ordinate line */ if(pdbfile->linetype[i]==PDBPARSE_COORD) { /* New chain */ if(pdbfile->chnn[i]!=lastchn) { add = 0; first=ajFalse; ignore=ajFalse; lastchn=pdbfile->chnn[i]; report_nonsqt=ajFalse; } if(!first) { last_rn=pdbfile->resn1[i]; ajStrAssignS(&last_rt, pdbfile->rtype[i]); first=ajTrue; continue; } this_rn=pdbfile->resn1[i]; ajStrAssignS(&this_rt, pdbfile->rtype[i]); /* A new residue is indicated if this ATOM is 'N' or if this is a different residue type*/ if( !(ajStrCmpC(pdbfile->atype[i], "N")) || !(ajStrMatchS(this_rt, last_rt))) { /* Check for duplicate residue numbers */ if(this_rn == last_rn) { add++; ignore=ajTrue; odd=ajTrue; } /* Check for drops in residue numbers, see 1pca */ if(this_rn < last_rn) { add+=(last_rn-this_rn+1); ignore=ajTrue; odd=ajFalse; } } if(this_rn > last_rn) ignore = ajFalse; pdbfile->resn1[i]=this_rn+add; ajStrAssignS(&last_rt, this_rt); last_rn = this_rn; if(ignore) { if(odd) pdbfile->oddnum[i]=ajTrue; if(!report_nonsqt) { ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "NONSQNTL", pdbfile->chnn[i], ajChararrGet(pdbfile->chid, (pdbfile->chnn[i]-1)),i+1); report_nonsqt=ajTrue; } } } else if(pdbfile->linetype[i]==PDBPARSE_MODEL) { add = 0; first=ajFalse; ignore=ajFalse; lastchn=-1; report_nonsqt=ajFalse; } } /* Fix remaining non-sequential residue numbering in resn2 array of Pdbfile object (duplicate lines for presumed heterogenous residues positions are ignored)*/ for(lastchn=-1, i=pdbfile->idxfirst;inlines;i++) { /* Co-ordinate line */ if((pdbfile->linetype[i]==PDBPARSE_COORD) && (!pdbfile->oddnum[i])) { /* New chain */ if(pdbfile->chnn[i]!=lastchn) { add = 0; first=ajFalse; lastchn=pdbfile->chnn[i]; } if(!first) { last_rn=pdbfile->resn2[i]; ajStrAssignS(&last_rt, pdbfile->rtype[i]); first=ajTrue; continue; } this_rn=pdbfile->resn2[i]; ajStrAssignS(&this_rt, pdbfile->rtype[i]); /* A new residue is indicated if this ATOM is 'N' or if this is a different residue type*/ if(!(ajStrCmpC(pdbfile->atype[i], "N")) || !(ajStrMatchS(this_rt, last_rt))) { /* Check for duplicate residue numbers */ if(this_rn == last_rn) { add++; } /* Check for drops in residue numbers, see 1pca */ if(this_rn < last_rn) { add+=(last_rn-this_rn+1); } } pdbfile->resn2[i]=this_rn+add; ajStrAssignS(&last_rt, this_rt); last_rn = this_rn; } else if (pdbfile->linetype[i]==PDBPARSE_MODEL) { add = 0; first=ajFalse; lastchn=-1; } } /* Tidy up and return */ ajStrDel(&last_rt); ajStrDel(&this_rt); return ajTrue; } /* @funcstatic AlignNumbering ************************************************* ** ** Reads a Pdbfile object and determines for each chain a set of residue ** numbers (the resni array) that give the correct index into the full length ** (SEQRES) sequence for residues listed in the ATOM records. ** ** The resni and resn1ok arrays of a Pdbfile object are written. ** The seqres and nres elements may be modified for any missing N-terminal ** residues. ** ** resn1ok array ** This array contains Bool's for each chain which are TRUE if resn1 was ** used to derive resni, i.e. gave correct alignment to the full-length ** (SEQRES) sequence. If False then resn2 was used ( resn1 gives the ** sequence presuming an alternative numbering scheme, resn2 gives the ** sequence presuming heterogeneity). ** ** @param [w] pdbfile [AjPPdbfile] Pdbfile object ** @param [u] logf [AjPFile] Pointer to log file (build diagnostics) ** @param [r] lim [ajuint] Max. no. permissible mismatches between ** the ATOM & SEQRES sequences. ** @param [r] lim2 [ajuint] Max. no. residues to trim when checking ** for missing C-terminal SEQRES residues. ** ** @return [AjBool] True on success, False otherwise ** @@ ** Must comment what diagnostics this writes! ** This now writes its diagnostics to logf, rather than tempfile. ****************************************************************************/ static AjBool AlignNumbering(AjPPdbfile pdbfile, AjPFile logf, ajuint lim, ajuint lim2) { /* Sequence and residue number data are given for each unique chain (data for the first model only is considered when assigning residue numbers) */ /* Rather than use, e.g. seq1, seq2 & seq, we could use a single 2 dimensional array, but we would need new code for 2d arrays of chararacters*/ ajuint a=0; /* Loop counter */ ajuint b=0; /* Loop counter */ ajuint maxb=0; /* Max value of b */ ajint i=0; /* Loop counter */ ajint j=0; /* Loop counter */ ajint k=0; /* Loop counter */ ajint x=0; /* Loop counter */ ajint y=0; /* Loop counter */ ajint z=0; /* Loop counter */ AjPStr *seq1=NULL; /* Sequences of residues from ATOM records (all residues) */ AjPStr *seq2=NULL; /* Sequences of residues from ATOM records (excluding residues for which oddnum array in Pdbfile object is True) */ AjPStr *seq=NULL; /* Pointer to seq1 or seq2 */ AjPStr seqbit=NULL; /* Subsequence of seq (real copy) */ ajint lenseqbit=0; /* Length of seqbit */ ajint *nres1=NULL; /* No. residues for seq1/arr1 */ ajint *nres2=NULL; /* No. residues for seq2/arr2 */ ajint *nres=NULL; /* Pointer to nres1 or nres2 */ AjPInt *num1=NULL; /* Residue numbers for seq1 (from resn1 element of the Pdbfile object) */ AjPInt *num2=NULL; /* Residue numbers for seq2 (from resn2 element of the Pdbfile object) */ AjPInt *num=NULL; /* Pointer to num1 or num2 */ AjPInt *idx=NULL; /* Gives correct index into seqres sequence (from Pdbfile object) for the current sequence. These are residue numbers and therefore idx would have a value of 1 for the first seqres residue.*/ AjPInt *idx_full=NULL; /* As idx but with empty array elements replacing missing residues so that we can index into idx_full using residue numbers from num */ ajint last1=-1000; /* Number of last residue for seq1/arr1*/ ajint last2=-1000; /* Number of last residue for seq2/arr2*/ char aa1=' '; /* Amino acid single character code*/ ajint c=0; /* No. of current chain */ AjBool done=ajFalse; /* True if we have found the correct residue numbering */ char *insert=NULL; /* String from N-terminus of ATOM sequence to insert at N-terminus of SEQRES sequence in case of the later missing residues */ AjPStr tmpseqres=NULL; /* Temp. string for seqres sequence from Pdbfile object */ AjPStr bit=NULL; /* Temp. string for a bit of sequence */ ajuint nmismatches=0; /* No. of mismatches between ATOM and SEQRES sequence */ ajint loc=0; /* Location of ATOM sequence in SEQRES sequence (if applicable) */ ajint len=0; /* Length of seqres sequence from Pdbfile object*/ AjBool err=ajFalse; /* True if a residue number from the ATOM records would cause an array boundary error in the seqres sequence */ ajint siz_substr=0; /* Size of substring for alignment of ATOM and SEQRES sequences */ const char *atm_ptr=NULL; /* Pointer to ATOM sequence */ const char *seqres_ptr=NULL;/* Pointer to SEQRES sequence */ const char *loc_ptr=NULL; /* Pointer for location of match of substring to SEQRES sequence*/ AjPStr substr=NULL; /* Substring of ATOM sequence */ AjPStr substr2=NULL; /* Substring of ATOM sequence */ ajint atm_idx=0; /* Index into ATOM sequence */ ajint seqres_idx=0; /* Index into SEQRES sequence */ ajint seqres_idx_last=0; /* Index into SEQRES sequence for C-terminal residue of substring*/ char aa_last=' '; /* Amino acid residue code of C-terminal residue of substring*/ AjBool fixed=ajFalse; /* Whether the mismatch residue of the substring was later aligned correctly */ AjBool done_end=ajFalse; /* True if we have aligned the terminus of the ATOM sequence */ AjBool founderr=ajFalse; /* Match of substring of ATOM sequence to SEQRES found with potential mismatched residue */ AjPStr msgstr=NULL; /* A string to hold a message */ AjPStr msgbit=NULL; /* A temp. string to hold part of a message */ ajint idx_misfit_atm=0; /* Index into ATOM sequence (seq) for first residue that does not match SEQRES sequence */ ajint idx_misfit_seqres =0;/* Index into SEQRES sequence for first residue that does not match ATOM sequence */ AjPStr aa_misfit=NULL; /* Original (PDB) residue number for first residue mismatch between ATOM and SEQRES sequences */ ajint this_num = 0; /* Current residue number */ /*DIAGNOSTIC ajint max = 0; */ /* Used in diagnostics code */ /* Check args */ if(!pdbfile || !logf) { ajWarn("Bad args passed to AlignNumbering\n"); return ajFalse; } /* Allocate memory for arrays etc*/ aa_misfit=ajStrNew(); msgstr=ajStrNew(); msgbit=ajStrNew(); seqbit=ajStrNew(); insert=ajCharNewRes(MAXMISSNTERM); tmpseqres=ajStrNew(); bit=ajStrNew(); substr=ajStrNew(); substr2=ajStrNew(); AJCNEW0(seq1, pdbfile->nchains); AJCNEW0(seq2, pdbfile->nchains); AJCNEW0(num1, pdbfile->nchains); AJCNEW0(num2, pdbfile->nchains); AJCNEW0(idx, pdbfile->nchains); AJCNEW0(idx_full, pdbfile->nchains); AJCNEW0(nres1, pdbfile->nchains); AJCNEW0(nres2, pdbfile->nchains); for(i=0;inchains; i++) { if(!pdbfile->chainok[i]) continue; seq1[i]=ajStrNew(); seq2[i]=ajStrNew(); num1[i]=ajIntNew(); num2[i]=ajIntNew(); } /* Assign arrays */ for(i=pdbfile->idxfirst;inlines;i++) { /* Co-ordinate line */ if(pdbfile->linetype[i]==PDBPARSE_COORD) { /* Break if we are no longer in the first model */ if(pdbfile->modn[i] != 1) break; else c=pdbfile->chnn[i]-1; /* ajFmtPrint("%S\n", pdbfile->lines[i]); */ if(pdbfile->resn1[i] != last1) { ajResidueFromTriplet(pdbfile->rtype[i],&aa1); ajStrAppendK(&seq1[c], aa1); ajIntPut(&num1[c], nres1[c],pdbfile->resn1[i]); last1=pdbfile->resn1[i]; nres1[c]++; } if((pdbfile->resn2[i] != last2) && (!pdbfile->oddnum[i])) { ajResidueFromTriplet(pdbfile->rtype[i],&aa1); ajStrAppendK(&seq2[c], aa1); ajIntPut(&num2[c], nres2[c],pdbfile->resn2[i]); last2=pdbfile->resn2[i]; nres2[c]++; } } } /* Allocate memory for arrays of residue numbers */ for(i=0;inchains; i++) { if(!pdbfile->chainok[i]) continue; /* Array must be big enough to cope with either sequence */ if(nres1[i] > nres2[i]) idx[i]=ajIntNewL(nres1[i]); else idx[i]=ajIntNewL(nres2[i]); /* Array must be big enough to cope with highest the residue number from either array */ if(ajIntGet(num1[i], nres1[i]-1) > ajIntGet(num2[i], nres2[i]-1)) { idx_full[i]=ajIntNewL(ajIntGet(num1[i], nres1[i]-1)+1); } else { idx_full[i]=ajIntNewL(ajIntGet(num2[i], nres2[i]-1)+1); } } /* Loop for each chain */ for(i=0;inchains; i++) { /* Skip this chain if necessary */ if(!(pdbfile->chainok[i])) continue; else ajStrAssignS(&tmpseqres, pdbfile->seqres[i]); /* Loop for checking for missing residues from N-term of SEQRES sequence */ for(done=ajFalse, j=0;jseqres[i]), tmpseqres); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); /* Modify the seqres sequence in the Pdbfile object by adding the first residues from the N-terminus of the ATOM sequence to the N-terminus of . */ for(k=0;(kseqres[i]), 0, insert); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); /* DIAGNOSTIC CODE ajFmtPrintF(logf, "\nChainMod %d" " (seq %d)\n%S\n%S\n\n\n", i+1,x+1, seq[i], pdbfile->seqres[i]); */ /***********************************************/ /******************* STEP 1 ********************/ /***********************************************/ /*DIAGNOSTIC ajFmtPrintF(logf, "STEP1 tmpseqres: %S\n", tmpseqres); ajFmtPrintF(logf, "chnn : %d\n" "seq1 : %S\n" "seq2 : %S\n" "seqr : %S\n", i+1, seq1[i], seq2[i], pdbfile->seqres[i]); ajFmtPrintF(logf, "\n"); if(ajStrMatchS(seq1[i], seq2[i])) ajFmtPrintF(logf, "seq1 and seq2 match\n"); else ajFmtPrintF(logf, "seq1 and seq2 DO NOT match\n"); if(ajStrMatchS(seq1[i], pdbfile->seqres[i])) ajFmtPrintF(logf, "seq1 and seqres match\n"); else ajFmtPrintF(logf, "seq1 and seqres DO NOT match\n"); ajFmtPrintF(logf, "\n"); */ /* Check whether sequences are identical length*/ if(nres[i]==pdbfile->nres[i]) { /* Sequences are identical - assign residue numbers 'by hand' */ if(ajStrMatchS(seq[i], pdbfile->seqres[i])) { for(k=0;kresn1ok[i]=ajTrue; else pdbfile->resn1ok[i]=ajFalse; done=ajTrue; /* DIAGNOSTIC ajFmtPrintF(logf, "STEP1 OK\n"); */ break; } /* Sequence are same length but contain mismatches */ else { for(ajStrAssignClear(&msgstr), nmismatches=0, k=0; kseqres[i], k)) { nmismatches++; /* Correct the seqres sequence. Replace this with appropriate library call once available so we don't have to probe the internals of the structure */ /* a will give the number of the first coordinate line for the mismatch residue from the ATOM records*/ a = PdbfileFindLine(pdbfile, i+1, x, ajIntGet(num[i], k)); /* Get the id of the mismatch residue in the SEQRES sequence. */ ajResidueToTriplet(pdbfile->seqres[i]->Ptr[k], &aa_misfit); /* To give correct index into SEQRES records in original PDB file, subtract j to account for modifications to the N-terminus that were made for missing residues relative to ATOM sequence. A further 1 is added to give a number starting from 1 (rather than 0)*/ ajFmtPrintS(&msgbit, "%S%S:%S%d. ", pdbfile->rtype[a], pdbfile->pdbn[a], aa_misfit, k-j+1); ajStrAppendS(&msgstr, msgbit); pdbfile->seqres[i]->Ptr[k]= ajStrGetCharPos(seq[i], k); } /* Sequences are same length (acceptable number of mismatches) */ if(nmismatches<=lim) { if(nmismatches) ajFmtPrintF(logf, "%-15s%d (%c) %d %S\n", "MISMATCH", i+1, ajChararrGet(pdbfile->chid, i), nmismatches, msgstr); for(k=0;kresn1ok[i]=ajTrue; else pdbfile->resn1ok[i]=ajFalse; done=ajTrue; /*DIAGNOSTIC ajFmtPrintF(logf, "STEP1 OK %d mismatches\n", nmismatches); */ break; } else { /* Otherwise, sequences are same length (unacceptable number of mismatches) Restore the original seqres sequence */ ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); /*DIAGNOSTIC ajFmtPrintF(logf, "STEP1 **NOT** OK %d mismatches\n", nmismatches); */ } } } /***********************************************/ /******************* STEP 2 ********************/ /***********************************************/ /*DIAGNOSTIC ajFmtPrintF(logf, "STEP2 tmpseqres: %S\n", tmpseqres); ajFmtPrintF(logf, "chnn : %d\n" "seq1 : %S\n" "seq2 : %S\n" "seqr : %S\n", i+1, seq1[i], seq2[i], pdbfile->seqres[i]); ajFmtPrintF(logf, "\n"); if(ajStrMatchS(seq1[i], seq2[i])) ajFmtPrintF(logf, "seq1 and seq2 match\n"); else ajFmtPrintF(logf, "seq1 and seq2 DO NOT match\n"); if(ajStrMatchS(seq1[i], pdbfile->seqres[i])) ajFmtPrintF(logf, "seq1 and seqres match\n"); else ajFmtPrintF(logf, "seq1 and seqres DO NOT match\n"); ajFmtPrintF(logf, "\n"); */ /* JONNEW The code below replaces 'ORIGINAL' code block */ maxb=lim2; if(maxb >= MAJSTRGETLEN(seq[i])) maxb = MAJSTRGETLEN(seq[i]) - 1; /*DIAGNOSTIC ajFmtPrintF(logf, "maxb = %d\nlim2 = %d\nstrlen: %d\n", maxb, lim2, MAJSTRGETLEN(seq[i]));*/ /* First pass through loop is full-length sequence */ for(b=0; b< maxb+1; b++) { lenseqbit = MAJSTRGETLEN(seq[i]) -b; ajStrAssignSubS(&seqbit, seq[i], 0, (lenseqbit -1)); ajStrAssignSubS(&bit, seq[i], (lenseqbit), -1); /*DIAGNOSTIC ajFmtPrintF(logf, "Trying ATOM substring %S\n" "versus SEQRES %S\n", seqbit, pdbfile->seqres[i]);*/ /* Check whether ATOM is substring of SEQRES sequence */ if((loc=ajStrFindS(pdbfile->seqres[i], seqbit))!=-1) { /* Check to ensure that the last substring residue is aligned to the last residue of the SEQRES residue, otherwise, problems would arise in cases where SEQRES sequence had C-terminal residues that were absent from the ATOM (& therefore also substring) sequence. */ if((loc+lenseqbit) != pdbfile->nres[i]) break; /* ATOM is substring of SEQRES sequence - assign residue numbers 'by hand' */ for(k=0;kresn1ok[i]=ajTrue; else pdbfile->resn1ok[i]=ajFalse; /* SEQRES sequence is missing C-terminal ATOM residues */ if(b) { ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "MISSCTERM", i+1, ajChararrGet(pdbfile->chid, i),b); ajStrAppendS(&(pdbfile->seqres[i]), bit); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); } done=ajTrue; /*DIAGNOSTIC ajFmtPrintF(logf, "STEP2 OK\n"); */ break; } } if(done) break; else { /* Otherwise, ATOM is NOT a substring of SEQRES sequence */ /*DIAGNOSTIC ajFmtPrintF(logf, "STEP2 **NOT** OK\n"); */ } /* ORIGINAL if((loc=ajStrFindS(pdbfile->seqres[i], seq[i]))!=-1) { for(k=0;kresn1ok[i]=ajTrue; else pdbfile->resn1ok[i]=ajFalse; done=ajTrue; ajFmtPrintF(logf, "STEP2 OK\n"); break; } */ /***********************************************/ /******************* STEP 3 ********************/ /***********************************************/ /*DIAGNOSTIC ajFmtPrintF(logf, "STEP3 tmpseqres: %S\n", tmpseqres); ajFmtPrintF(logf, "chnn : %d\n" "seq1 : %S\n" "seq2 : %S\n" "seqr : %S\n", i+1, seq1[i], seq2[i], pdbfile->seqres[i]); ajFmtPrintF(logf, "\n"); if(ajStrMatchS(seq1[i], seq2[i])) ajFmtPrintF(logf, "seq1 and seq2 match\n"); else ajFmtPrintF(logf, "seq1 and seq2 DO NOT match\n"); if(ajStrMatchS(seq1[i], pdbfile->seqres[i])) ajFmtPrintF(logf, "seq1 and seqres match\n"); else ajFmtPrintF(logf, "seq1 and seqres DO NOT match\n"); ajFmtPrintF(logf, "\n"); */ /* Check whether SEQRES is substring of ATOM sequence */ /* This will only find omissions from the SEQRES sequence where the ATOM sequence would align to it wihout gaps, and where the SEQRES sequence does not have extra N-terminal residues relative to ATOM (such cases are caught in STEP 2) */ if((loc=ajStrFindS(seq[i],pdbfile->seqres[i])) !=-1) { /* SEQRES is substring of ATOM sequence - correct for residues missing from SEQRES sequence and assign residue numbers 'by hand' */ /* N-terminal insertion needed*/ if(loc!=0) { ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "MISSNTERM", i+1, ajChararrGet(pdbfile->chid, i), loc); ajStrAssignSubS(&bit, seq[i], 0, loc-1); ajStrInsertS(&(pdbfile->seqres[i]), 0, bit); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); } /* C-terminal insertion needed */ if(pdbfile->nres[i]!=nres[i]) { ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "MISSCTERM", i+1, ajChararrGet(pdbfile->chid, i), ( nres[i] - pdbfile->nres[i])); ajStrAssignSubS(&bit, seq[i], pdbfile->nres[i], nres[i]-1); ajStrAppendS(&(pdbfile->seqres[i]), bit); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); } for(k=0;kresn1ok[i]=ajTrue; else pdbfile->resn1ok[i]=ajFalse; /*DIAGNOSTIC ajFmtPrintF(logf, "STEP3 OK\n"); */ done=ajTrue; break; } /* Otherwise, SEQRES is NOT a substring of the ATOM sequence */ /* DIAGNOSTIC ajFmtPrintF(logf, "STEP3 **NOT** OK\n"); */ /***********************************************/ /******************* STEP 4 ********************/ /***********************************************/ /*DIAGNOSTIC ajFmtPrintF(logf, "STEP4.1 tmpseqres: %S\n", tmpseqres); ajFmtPrintF(logf, "chnn : %d\n" "seq1 : %S\n" "seq2 : %S\n" "seqr : %S\n", i+1, seq1[i], seq2[i], pdbfile->seqres[i]); ajFmtPrintF(logf, "\n"); if(ajStrMatchS(seq1[i], seq2[i])) ajFmtPrintF(logf, "seq1 and seq2 match\n"); else ajFmtPrintF(logf, "seq1 and seq2 DO NOT match\n"); if(ajStrMatchS(seq1[i], pdbfile->seqres[i])) ajFmtPrintF(logf, "seq1 and seqres match\n"); else ajFmtPrintF(logf, "seq1 and seqres DO NOT match\n"); ajFmtPrintF(logf, "\n"); ajFmtPrintF(logf, "STEP4.2 tmpseqres: %S\n", tmpseqres); */ /* Check whether residue numbering is correct (and count the number of mismatches) */ for(err=ajFalse, ajStrAssignClear(&msgstr), nmismatches=0, k=0;knres[i]) { if(ajStrGetCharPos(seq[i], k) != ajStrGetCharPos(pdbfile->seqres[i], this_num-1)) { nmismatches++; /* Correct the seqres sequence. Replace this with appropriate library call once available so we don't have to probe the internals of the structure */ /* a will give the number of the first coordinate line for the mismatch residue from the ATOM records */ a = PdbfileFindLine(pdbfile, i+1, x, ajIntGet(num[i], k)); /* Get the id of the mismatch residue in the SEQRES sequence. */ ajResidueToTriplet( pdbfile->seqres[i]->Ptr[this_num-1], &aa_misfit); ajFmtPrintS(&msgbit, "%S%S:%S%d. ", pdbfile->rtype[a], pdbfile->pdbn[a], aa_misfit, this_num-j); ajStrAppendS(&msgstr, msgbit); pdbfile->seqres[i]->Ptr[this_num-1]= ajStrGetCharPos(seq[i], k); } } else { err=ajTrue; break; } } /*DIAGNOSTIC ajFmtPrintF(logf, "STEP4.3 tmpseqres: %S\n", tmpseqres); */ if(!err) { /* Residue numbering is correct (no or acceptable number of mismatches) */ if(nmismatches<=lim) { if(nmismatches) ajFmtPrintF(logf, "%-15s%d (%c) %d %S\n", "MISMATCH", i+1, ajChararrGet(pdbfile->chid, i), nmismatches, msgstr); for(k=0;kresn1ok[i]=ajTrue; else pdbfile->resn1ok[i]=ajFalse; done=ajTrue; /*DIAGNOSTIC ajFmtPrintF(logf, "STEP4 OK %d mismatches\n", nmismatches); */ break; } else { /* Otherwise, residue numbering is incorrect (unacceptable number of mismatches) Restore the original seqres sequence*/ ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); /*DIAGNOSTIC ajFmtPrintF(logf, "STEP4 **NOT** OK %d mismatches\n", nmismatches); */ } } else { /* Otherwise, residue numbering is incorrect (residue number is out of range) Restore the original seqres sequence*/ ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); /*DIAGNOSTIC ajFmtPrintF(logf, "STEP4 **NOT** OK out_of_range\n"); */ } /***********************************************/ /******************* STEP 5 ********************/ /***********************************************/ /*DIAGNOSTIC ajFmtPrintF(logf, "STEP5 tmpseqres: %S\n", tmpseqres); ajFmtPrintF(logf, "chnn : %d\n" "seq1 : %S\n" "seq2 : %S\n" "seqr : %S\n", i+1, seq1[i], seq2[i], pdbfile->seqres[i]); ajFmtPrintF(logf, "\n"); if(ajStrMatchS(seq1[i], seq2[i])) ajFmtPrintF(logf, "seq1 and seq2 match\n"); else ajFmtPrintF(logf, "seq1 and seq2 DO NOT match\n"); if(ajStrMatchS(seq1[i], pdbfile->seqres[i])) ajFmtPrintF(logf, "seq1 and seqres match\n"); else ajFmtPrintF(logf, "seq1 and seqres DO NOT match\n"); ajFmtPrintF(logf, "\n"); */ /* None of the measures above could find the correct residue numbering so try by alignment. Align the SEQRES sequence to the ATOM sequence by taking progressively decreasing sized substrings from the ATOM sequence. It returns 0 if any of the alignments would not leave enough space to be able to align the rest of the ATOM sequence to the SEQRES sequence - i.e. alignments giving an overspill of the ATOM sequence past the C-terminus of the SEQRES sequence are NOT allowed. NO Mismatches are allowed at this stage */ for(done_end=ajFalse, len=pdbfile->nres[i], siz_substr=nres[i], atm_ptr=ajStrGetPtr(seq[i]), seqres_ptr=ajStrGetPtr(pdbfile->seqres[i]); siz_substr>0; ) { ajStrAssignSubC(&substr, atm_ptr, 0, siz_substr-1); if((loc_ptr = strstr(seqres_ptr, ajStrGetPtr(substr)))==NULL) { siz_substr--; continue; } else { atm_idx= (int) ((atm_ptr-ajStrGetPtr(seq[i])) /sizeof(char)); seqres_idx= (int) ((loc_ptr-ajStrGetPtr(pdbfile->seqres[i])) /sizeof(char)); /*CHECK TO SEE IF THERE IS SPACE TO FIT THE REMAINER OF THE ATOM SEQUENCE IN THE SEQRES SEQUENCE GIVEN THIS ALIGNMENT */ if((nres[i] - atm_idx) > (len - seqres_idx)) break; for(k=0, y=atm_idx, z=seqres_idx; kseqres[i]); ajFmtPrintF(logf, "\n"); if(ajStrMatchS(seq1[i], seq2[i])) ajFmtPrintF(logf, "seq1 and seq2 match\n"); else ajFmtPrintF(logf, "seq1 and seq2 DO NOT match\n"); if(ajStrMatchS(seq1[i], pdbfile->seqres[i])) ajFmtPrintF(logf, "seq1 and seqres match\n"); else ajFmtPrintF(logf, "seq1 and seqres DO NOT match\n"); ajFmtPrintF(logf, "\n"); */ /* Residue numbering is correct after alignment (no mismatches) */ if(x==0) pdbfile->resn1ok[i]=ajTrue; else pdbfile->resn1ok[i]=ajFalse; ajFmtPrintF(logf, "%-15s%d (%c)\n", "GAPPEDOK", i+1, ajChararrGet(pdbfile->chid, i)); /*DIAGNOSTIC ajFmtPrintF(logf, "STEP5 OK\n"); */ done=ajTrue; break; } /*DIAGNOSTIC ajFmtPrintF(logf, "STEP5 **NOT** OK\n"); */ /* Otherwise, agreement could not be found */ /* array might contain junk values now but this should not matter as the array should be overwritten */ /***********************************************/ /******************* STEP 6 ********************/ /***********************************************/ /*DIAGNOSTIC ajFmtPrintF(logf, "STEP6 tmpseqres: %S\n", tmpseqres); ajFmtPrintF(logf, "chnn : %d\n" "seq1 : %S\n" "seq2 : %S\n" "seqr : %S\n", i+1, seq1[i], seq2[i], pdbfile->seqres[i]); ajFmtPrintF(logf, "\n"); if(ajStrMatchS(seq1[i], seq2[i])) ajFmtPrintF(logf, "seq1 and seq2 match\n"); else ajFmtPrintF(logf, "seq1 and seq2 DO NOT match\n"); if(ajStrMatchS(seq1[i], pdbfile->seqres[i])) ajFmtPrintF(logf, "seq1 and seqres match\n"); else ajFmtPrintF(logf, "seq1 and seqres DO NOT match\n"); ajFmtPrintF(logf, "\n"); */ /* Try again by alignment. Mismatches ARE allowed at this stage Must change: */ for(ajStrAssignClear(&msgstr), nmismatches=0, done_end=ajFalse, len=pdbfile->nres[i], siz_substr=nres[i], atm_ptr=ajStrGetPtr(seq[i]), seqres_idx_last=-1, seqres_ptr=ajStrGetPtr(pdbfile->seqres[i]); siz_substr>0; ) { /* On the first pass, atm_ptr and seqres_ptr point to the start of the ATOM and SEQRES sequences respectively. */ founderr=ajFalse; /* Copy block (of progressively decreasing size) from ATOM sequence to substring1 */ ajStrAssignSubC(&substr, atm_ptr, 0, siz_substr-1); /*DIAGNOSTIC ajFmtPrintF(logf, "\n***\n%12s\n%s\n%12s\n%s\n", "Aligning ", ajStrGetPtr(substr), "To ", seqres_ptr); */ /* Set loc_ptr to point to the first occurence of substring1 in SEQRES sequence */ /* If substring1 does not occur in SEQRES sequence */ if( (loc_ptr = strstr(seqres_ptr, ajStrGetPtr(substr)))==NULL) { /*See if there is an error in the residue id at the start of Create a new substring from the ATOM records but omit the first character. There is a specical case if substr is only 1 character long.*/ if(siz_substr == 1) { loc_ptr = seqres_ptr; nmismatches++; founderr=ajTrue; } else { /* Copy substring1 to substring2 but omit the first character */ ajStrAssignSubS(&substr2, substr, 1, -1); /*DIAGNOSTIC ajFmtPrintF(logf, "\n!!!\n%12s\n%s\n%12s\n%s\n", "Aligning ", ajStrGetPtr(substr2), "To ", seqres_ptr); */ /* Set loc_ptr to point to the first occurence of substring1 in SEQRES sequence */ /* If substring2 does not occur in the SEQRES sequence, continue with a smaller substring */ loc_ptr = strstr(seqres_ptr, ajStrGetPtr(substr2)); if(!loc_ptr) { siz_substr--; continue; } /* substring2 is found in the SEQRES sequence */ else { /* If there is not enough space to accomodate the 'missing' residue continue (with a smaller substring) */ if(loc_ptr == seqres_ptr) { siz_substr--; continue; } /*DIAGNOSTIC ajFmtPrintF(logf, "\n\n\nnmismatches = " "%d\n%12s%s\n%12s%s\n%12s%s\n%12s%s\n", nmismatches, "atm_ptr:", atm_ptr, "seqres_ptr:", seqres_ptr, "substr:", substr, "substr2", substr2); */ /*DIAGNOSTIC ajFmtPrintF(logf, "MISMATCH FOUND OK\n"); */ /* There is enough space to accommodate substring2 and the 'missing' (mismatch) residue */ nmismatches++; founderr=ajTrue; } } } /*DIAGNOSTIC else ajFmtPrintF(logf, "ALIGNMENT FOUND OK\n"); */ /* atm_idx and seqres_idx are set to give the index into ATOM and SEQRES sequences respectively for the position of match of N-terminal residue of substring (if founderr is True this will be the position of the N-terminal mismatch residue) */ atm_idx= (int) ((atm_ptr-ajStrGetPtr(seq[i]))/sizeof(char)); if(founderr) seqres_idx= (int) (((loc_ptr-1)- ajStrGetPtr(pdbfile->seqres[i])) /sizeof(char)); else seqres_idx= (int) ((loc_ptr- ajStrGetPtr(pdbfile->seqres[i])) /sizeof(char)); /*DIAGNOSTIC ajFmtPrintF(logf, "seqres_idx : %d\n", seqres_idx); */ /* If there was a mismatch residue, idx_misfit_atm and idx_misfit_seqres will give the index into the ATOM and SEQRES sequences respectively for its position */ if(founderr) { idx_misfit_atm = atm_idx; idx_misfit_seqres = seqres_idx; } /*CHECK TO SEE IF THERE IS SPACE TO FIT THE REMAINDER OF THE ATOM SEQUENCE IN THE SEQRES SEQUENCE GIVEN THIS ALIGNMENT */ if((nres[i] - atm_idx) > (len - seqres_idx)) break; /**************************************************/ /* This will have to change for 1st residue */ /**************************************************/ /* Try and find an exact match within the gap for the mismatch residue */ fixed = ajFalse; if(founderr) { /*DIAGNOSTIC ajFmtPrintF(logf, "About to try (seqres_idx_last: %d, " "seqres_idx: %d) ...\n", seqres_idx_last, seqres_idx); */ aa_last = ajStrGetCharFirst(substr); for(z=seqres_idx_last+1; zseqres[i]->Ptr[z]);*/ if(pdbfile->seqres[i]->Ptr[z] == aa_last) { nmismatches--; founderr=ajFalse; fixed = ajTrue; /* Assign residue number */ ajIntPut(&idx[i], atm_idx, z+1); for(k=0, y=atm_idx+1, z=seqres_idx+1; kseqres[i]->Ptr[idx_misfit_seqres], &aa_misfit); /* To give correct index into SEQRES records in original PDB file, subtract j to account for modifications to the N-terminus that were made for missing residues relative to ATOM sequence. A further 1 is added to give a number starting from 1 (rather than 0)*/ /*DIAGNOSTIC ajFmtPrintF(logf, "a : %d\npdbfile->rtype[a] : " "%S\npdbfile->pdbn[a] : %S\n", a, pdbfile->rtype[a], pdbfile->pdbn[a]); */ ajFmtPrintS(&msgbit, "%S %S %S %d; ", pdbfile->rtype[a], pdbfile->pdbn[a], aa_misfit, idx_misfit_seqres-j+1); /*DIAGNOSTIC ajFmtPrintS(&msgbit, "ATOM residue %d (%c) vs " "SEQRES residue %d (%c). ", ajIntGet(num[i], atm_idx), ajStrGetCharFirst(substr), seqres_idx+1, pdbfile->seqres[i]->Ptr[seqres_idx]); */ ajStrAppendS(&msgstr, msgbit); pdbfile->seqres[i]->Ptr[seqres_idx]=ajStrGetCharFirst(substr); } /* atm_ptr and seqres_ptr now point to 1 residue past the end of the match of the substring in the ATOM and SEQRES sequences respectively. */ atm_ptr+=siz_substr; if(founderr) seqres_ptr=(loc_ptr-1)+siz_substr; else seqres_ptr=loc_ptr+siz_substr; siz_substr=nres[i]-(atm_idx+siz_substr); /**************************************************/ /* Must assign index into SEQRES for */ /* C-terminal residue of substring */ /**************************************************/ /* seqres_idx_last is set to give the index into SEQRES sequence for the position of match of C-terminal residue of substring + 1 */ seqres_idx_last = (int) z-1; } /*Check to ensure that position for last residue has been worked out */ if((done_end) && (nmismatches<=lim)) { if(nmismatches) ajFmtPrintF(logf, "%-15s%d (%c) %d %S\n", "GAPPED", i+1, ajChararrGet(pdbfile->chid, i), nmismatches, msgstr); else ajFmtPrintF(logf, "%-15s%d (%c)\n", "GAPPEDOK", i+1, ajChararrGet(pdbfile->chid, i)); /* Residue numbering is correct after alignment (acceptable number of mismatches) */ if(x==0) pdbfile->resn1ok[i]=ajTrue; else pdbfile->resn1ok[i]=ajFalse; /*DIAGNOSTIC ajFmtPrintF(logf, "STEP6 OK %d mismatches\n", nmismatches); */ done=ajTrue; break; } /*DIAGNOSTIC ajFmtPrintF(logf, "STEP6 **NOT** OK %d mismatches\n", nmismatches); */ /* Otherwise, agreement could not be found - unacceptable number of mismatches. Restore the original seqres sequence*/ ajStrAssignS(&(pdbfile->seqres[i]), tmpseqres); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); } /* for(x=0;x<2;x++) */ if(done) break; } /* After trying 6 different alignment steps with (potentially modified) seqres sequences and 2 (possibly different) sequences derived from the ATOM records an alignment with agreement in residue numbering still cannot be found. Use data from ATOM records only - use seq1 (all residues) and presume there are no missing residues.*/ if(!done) { ajFmtPrintF(logf, "%-15s%d (%c)\n", "NOMATCH", i+1, ajChararrGet(pdbfile->chid, i)); ajStrAssignS(&(pdbfile->seqres[i]), seq1[i]); pdbfile->nres[i]=ajStrGetLen(pdbfile->seqres[i]); for(k=0;kresn1ok[i]=ajTrue; } else { if(j) ajFmtPrintF(logf, "%-15s%d (%c) %d\n", "MISSNTERM", i+1, ajChararrGet(pdbfile->chid, i), j); if(!ajStrMatchS(seq1[i], seq2[i])) { if(x==0) ajFmtPrintF(logf, "%-15s%d\n", "ALTERNOK", i+1); else ajFmtPrintF(logf, "%-15s%d\n", "HETEROK", i+1); } } } /* Write the index arrays */ for(i=0;inchains; i++) { if(!pdbfile->chainok[i]) continue; if(pdbfile->resn1ok[i]) { for(j=0;jidxfirst;inlines;i++) { if(pdbfile->linetype[i]==PDBPARSE_COORD) { if(pdbfile->resn1ok[pdbfile->chnn[i]-1]) { pdbfile->resni[i]= ajIntGet(idx_full[pdbfile->chnn[i]-1], pdbfile->resn1[i]); /*DIAGNOSTIC ajFmtPrintF(logf, "Got position %d (%d) ok\n", pdbfile->resn1[i], ajIntGet(idx_full[pdbfile->chnn[i]-1], pdbfile->resn1[i])); */ } else { pdbfile->resni[i]= ajIntGet(idx_full[pdbfile->chnn[i]-1], pdbfile->resn2[i]); /*DIAGNOSTIC ajFmtPrintF(logf, "Got position %d (%d) ok\n", pdbfile->resn2[i], ajIntGet(idx_full[pdbfile->chnn[i]-1], pdbfile->resn2[i])); */ } } } /* DIAGNOSTIC ajFmtPrintF(logf, "\n\n\n"); seq=seq1; for(i=0;inchains; i++) { if(!pdbfile->chainok[i]) { ajFmtPrintF(logf, "Chain %d\nSEQRES %S\nCHAIN NOT OK\n\n\n", i+1, pdbfile->seqres[i]); continue; } ajFmtPrintF(logf, "Chain %d\nSEQRES %S\nSEQ__1 %S\nSEQ__2 %S\n\n\n", i+1, pdbfile->seqres[i], seq1[i], seq2[i]); } for(i=0;inchains; i++) { if(!pdbfile->chainok[i]) continue; if(nres1[i] > nres2[i]) max = nres1[i]; else max = nres2[i]; ajFmtPrintF(logf, "CHAIN %d\n", i+1); ajFmtPrintF(logf, "seqres %S\n", pdbfile->seqres[i]); ajFmtPrintF(logf, "%-6s%-6s%-6s%-6s%-6s%-6s%-6s\n", "RES", "NUM", "SEQ1", "RESN1", "SEQ2", "RESN2", "IDX"); for(j=0;jresn1ok[i]) { ajFmtPrintF(logf, "%-6d\n", ajIntGet(idx[i], j)); } else { ajFmtPrintF(logf, "%-6c\n", '.'); } } else { ajFmtPrintF(logf, "%-6s%-6d%-6c%-6c%-6c%-6d", "RES", j+1, '.', '.', ajStrGetCharPos(seq2[i], j), ajIntGet(num2[i], j)); if(!pdbfile->resn1ok[i]) { ajFmtPrintF(logf, "%-6d\n", ajIntGet(idx[i], j)); } else { ajFmtPrintF(logf, "%-6c\n", '.'); } } } } */ /* Tidy up and return */ ajStrDel(&aa_misfit); ajStrDel(&seqbit); ajStrDel(&msgstr); ajStrDel(&msgbit); AJFREE(insert); ajStrDel(&tmpseqres); ajStrDel(&bit); ajStrDel(&substr); ajStrDel(&substr2); for(i=0;inchains; i++) { if(!pdbfile->chainok[i]) continue; ajStrDel(&seq1[i]); ajStrDel(&seq2[i]); ajIntDel(&num1[i]); ajIntDel(&num2[i]); ajIntDel(&idx[i]); ajIntDel(&idx_full[i]); } AJFREE(seq1); AJFREE(seq2); AJFREE(num1); AJFREE(num2); AJFREE(idx); AJFREE(idx_full); AJFREE(nres1); AJFREE(nres2); return ajTrue; } /* #funcstatic diagnostic ************************************************** ** ** For printing out diagnostics for pdbparse build ** ** ** #param [r] pdbfile [AjPPdbfile] Pdbfile object ** #param [r] n [ajint ] Flag for controlling output ** ** #return [AjBool] True on success, False otherwise ** ## ****************************************************************************/ /*THIS_DIAGNOSTIC //static void diagnostic(AjPPdbfile pdbfile, ajint n) //{ // ajint i; // i=0; // // // // // // if(n==0) // { // ajFmtPrintF(tempfile, "nchains: %d\n", pdbfile->nchains); // for(i=0;inchains; i++) // ajFmtPrintF(tempfile, "chid: %c\n", // ajChararrGet(pdbfile->chid, i)); // for(i=0;inchains; i++) // { // ajFmtPrintF(tempfile, "seqres %d: %S\n", i, pdbfile->seqres[i]); // } // // // ajFmtPrintF(tempfile, "tercnt: %d\n", pdbfile->tercnt); // ajFmtPrintF(tempfile, "COMPND: %S\n", pdbfile->compnd); // ajFmtPrintF(tempfile, "SOURCE: %S\n", pdbfile->source); // ajFmtPrintF(tempfile, "reso: %f\n", pdbfile->reso); // if(pdbfile->method == ajXRAY) // ajFmtPrintF(tempfile, "method: ajXRAY\n"); // else // ajFmtPrintF(tempfile, "method: ajNMR\n"); // } // else if(n==1) // { // for(i=0;inlines;i++) // { // ajFmtPrintF(tempfile, "%-5S", pdbfile->pdbid); // // if(pdbfile->linetype[i]==PDBPARSE_IGNORE) // ajFmtPrintF(tempfile, "%-10s", "IGNORE"); // else if(pdbfile->linetype[i]==PDBPARSE_COORD) // ajFmtPrintF(tempfile, "%-10s", "COORD"); // else if(pdbfile->linetype[i]==PDBPARSE_COORDHET) // ajFmtPrintF(tempfile, "%-10s", "COORDHET"); // else if(pdbfile->linetype[i]==PDBPARSE_COORDGP) // ajFmtPrintF(tempfile, "%-10s", "COORDGP"); // else if(pdbfile->linetype[i]==PDBPARSE_COORDWAT) // ajFmtPrintF(tempfile, "%-10s", "COORDWAT"); // else if(pdbfile->linetype[i]==PDBPARSE_TER) // ajFmtPrintF(tempfile, "%-10s", "TER"); // else if(pdbfile->linetype[i]==PDBPARSE_MODEL) // ajFmtPrintF(tempfile, "%-10s", "MODEL"); // // // // ajFmtPrintF(tempfile, // "M%-2dC%-2d%-6S%-5d%-5d%-4B%-4S%-4S%-7.3f%-7.3f" // "%-7.3f%-6.3f%-6.3f\n", // pdbfile->modn[i], // pdbfile->chnn[i], // pdbfile->pdbn[i], // pdbfile->resn1[i], // pdbfile->resn2[i], // pdbfile->oddnum[i], // pdbfile->atype[i], // pdbfile->rtype[i], // pdbfile->x[i], // pdbfile->y[i], // pdbfile->z[i], // pdbfile->o[i], // pdbfile->b[i]); // // } // } // // //} */ /* @funcstatic PdbfileToPdb ************************************************ ** ** Reads data from a Pdbfile object and writes a Pdb object. ** Chains that did not contain at least the user-defined threshold number of ** amino acid residues are discarded, i.e. are NOT copied and will NOT appear ** in the output file that is eventually generated. ** ** ** @param [w] ret [AjPPdb *] Pdb object pointer ** @param [u] pdb [AjPPdbfile] Pdbfile object ** ** @return [AjBool] True on success, False otherwise ** @@ ****************************************************************************/ static AjBool PdbfileToPdb(AjPPdb *ret, AjPPdbfile pdb) { ajint i =0; /* Loop counter */ ajint idx =0; /* Index into chain array */ ajint j =0; /* Loop counter */ AjPAtom atm =NULL; /* Atom object */ AjPResidue res =NULL; /* Residue object */ ajint nchn =0; /* No. chains that have min. no. of aa's */ AjPInt lookup; /* Array of chain numbers for chains in ret for all chains in pdb.A '0' is given for chains with < threshold no. of aa's */ ajint chn=0; ajint rn_last = -100000; ajint mn_last = -100000; ajint eNum; AjPStr eId=NULL; char eType; ajint eClass; if( !ret || !pdb) { ajWarn("Bad args passed to PdbfileToPdb"); return ajFalse; } if((*ret)) { ajWarn("Bad args passed to PdbfileToPdb - PDB object exists"); return ajFalse; } eId = ajStrNew(); lookup = ajIntNewL(pdb->nchains); ajIntPut(&lookup, pdb->nchains-1, 0); for(nchn=0, i=0;inchains;i++) if((pdb)->chainok[i]) { nchn++; ajIntPut(&lookup, i, nchn); } *ret=ajPdbNew(nchn); (*ret)->Nchn = nchn; ajStrAssignS(&((*ret)->Pdb), pdb->pdbid); ajStrAssignS(&((*ret)->Compnd), pdb->compnd); ajStrAssignS(&((*ret)->Source), pdb->source); (*ret)->Method = pdb->method; (*ret)->Reso = pdb->reso; (*ret)->Nmod = pdb->modcnt; (*ret)->Ngp = pdb->ngroups; /* (*ret)->Nchn = pdb->nchains; */ for(i=0; ingroups; i++) ajChararrPut(&((*ret)->gpid), i, ajChararrGet(pdb->gpid, i)); for(idx=-1, i=0;inchains;i++) { if((pdb)->chainok[i]) idx++; else continue; (*ret)->Chains[idx]->Id = ajChararrGet((pdb)->chid, i); /* These counts are no longer made from the PDB records. They are only made if the file is annotated with stride secondary structure info by using pdbstride */ /* (*ret)->Chains[idx]->numHelices = (pdb)->numHelices[i]; (*ret)->Chains[idx]->numStrands = (pdb)->numStrands[i]; (*ret)->Chains[idx]->numSheets = (pdb)->numSheets[i]; (*ret)->Chains[idx]->numTurns = (pdb)->numTurns[i]; */ (*ret)->Chains[idx]->Nres = (pdb)->nres[i]; (*ret)->Chains[idx]->Nlig = (pdb)->nligands[i]; ajStrAssignS(&((*ret)->Chains[idx]->Seq), (pdb)->seqres[i]); } for(j=(pdb)->idxfirst; j<(pdb)->nlines; j++) { if(((pdb)->linetype[j]==PDBPARSE_COORD) || ((pdb)->linetype[j]==PDBPARSE_COORDHET) || ((pdb)->linetype[j]==PDBPARSE_COORDGP) || ((pdb)->linetype[j]==PDBPARSE_COORDWAT)) { /* Skip this line if it for a heterogenous (duplicate) position */ if( (!(pdb)->resn1ok[(pdb)->chnn[j]-1]) && (pdb)->oddnum[j]) continue; atm = ajAtomNew(); atm->Mod = (pdb)->modn[j]; /* atm->Chn = (pdb)->chnn[j]; */ atm->Chn = ajIntGet(lookup, (pdb)->chnn[j]-1); atm->Gpn = (pdb)->gpn[j]; if((pdb)->linetype[j]==PDBPARSE_COORDHET) atm->Type = 'H'; else if((pdb)->linetype[j]==PDBPARSE_COORDGP) atm->Type = 'H'; else if((pdb)->linetype[j]==PDBPARSE_COORD) atm->Type = 'P'; else if((pdb)->linetype[j]==PDBPARSE_COORDWAT) atm->Type = 'W'; atm->Idx = (pdb)->resni[j]; ajStrAssignS(&atm->Pdb, (pdb)->pdbn[j]); if(((pdb)->linetype[j]==PDBPARSE_COORDHET) || ((pdb)->linetype[j]==PDBPARSE_COORDGP) || ((pdb)->linetype[j]==PDBPARSE_COORDWAT)) atm->Id1='.'; else ajResidueFromTriplet((pdb)->rtype[j],&atm->Id1); ajStrAssignS(&atm->Id3, (pdb)->rtype[j]); ajStrAssignS(&atm->Atm, (pdb)->atype[j]); atm->X=(pdb)->x[j]; atm->Y=(pdb)->y[j]; atm->Z=(pdb)->z[j]; atm->O=(pdb)->o[j]; atm->B=(pdb)->b[j]; ajStrAssignS(&eId, (pdb)->elementId[j]); eNum = (pdb)->elementNum[j]; eType = (pdb)->elementType[j]; eClass = (pdb)->helixClass[j]; if((pdb)->linetype[j]==PDBPARSE_COORDGP) ajListPushAppend((*ret)->Groups, atm); else if((pdb)->linetype[j]==PDBPARSE_COORDWAT) ajListPushAppend((*ret)->Water, atm); else { if((pdb)->chainok[(pdb)->chnn[j]-1]) { /* ajListPushAppend((*ret)->Chains[(pdb)->chnn[j]-1]->Atoms, atm); */ /* ajListPushAppend((*ret)->Chains[ajIntGet(lookup, (pdb)->chnn[j]-1)-1]->Atoms, atm); */ chn = ajIntGet(lookup, (pdb)->chnn[j]-1)-1; ajListPushAppend((*ret)->Chains[chn]->Atoms, atm); /* Write residue object */ if(atm->Type=='P') { /* New model */ if(atm->Mod != mn_last) { rn_last = -100000; mn_last = atm->Mod; } /* New residue */ if(atm->Idx != rn_last) { res = ajResidueNew(); res->Mod = atm->Mod; res->Chn = atm->Chn; res->Idx = atm->Idx; ajStrAssignS(&res->Pdb, atm->Pdb); res->Id1 = atm->Id1; ajStrAssignS(&res->Id3, atm->Id3); res->eNum = eNum; ajStrAssignS(&res->eId, eId); res->eType = eType; res->eClass = eClass; ajListPushAppend((*ret)->Chains[chn]->Residues, (void *) res); rn_last = atm->Idx; } } } else ajAtomDel(&atm); } } else continue; } ajIntDel(&lookup); ajStrDel(&eId); return ajTrue; } /* @funcstatic PdbfileFindLine ************************************************ ** ** Returns the line number of the first instance of a line with a specified ** residue and chain number. ** ** @param [r] pdb [const AjPPdbfile] Pdbfile object pointer ** @param [r] chn [ajint] Chain number ** @param [r] which [ajint] 0 or 1, refer to resn1 or resn2 residue ** @param [r] pos [ajint] Residue number ** ** @return [ajint] Line number (index, i.e. starts from 0). ** @@ ****************************************************************************/ static ajint PdbfileFindLine(const AjPPdbfile pdb, ajint chn, ajint which, ajint pos) { ajint a=0; /* a will give the number of the first coordinate line for the mismatch residue from the ATOM records*/ for(a=pdb->idxfirst;anlines;a++) if(pdb->linetype[a]==PDBPARSE_COORD && pdb->chnn[a] == chn) /* First sequence (all residues) derived for atom records */ /* OR Second sequence (excluding certain residues) derived for atom records */ if(((which==0) && (pos == pdb->resn1[a])) || ((which==1) && (pos == pdb->resn2[a]))) break; if(a==pdb->nlines) ajFatal("Unexpected loop failure in PdbfileFindLine. " "Email jison@hgmp.mrc.ac.uk\n"); return a; } /* @funcstatic PdbfileChain ************************************************ ** ** Finds the chain number for a given chain identifier in a pdbfile structure ** ** @param [r] id [char] Chain identifier ** @param [r] pdb [const AjPPdbfile] Pdbfile object ** @param [w] chn [ajint *] Chain number ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ static AjBool PdbfileChain(char id, const AjPPdbfile pdb, ajint *chn) { ajint a; for(a=0;anchains;a++) { if(toupper((int) ajChararrGet(pdb->chid, a)) == toupper((int) id)) { *chn=a+1; return ajTrue; } /* Cope with chain id's of ' ' (which might be given as '.' in the Pdbfile object) */ if((id==' ')&&(ajChararrGet(pdb->chid, a)=='.')) { *chn=a+1; return ajTrue; } } /* A '.' may be given as the id for domains comprising more than one chain*/ if(id=='.') { *chn=1; return ajTrue; } return ajFalse; } /* @funcstatic WriteElementData ******************************************** ** ** Reads the secondary structure information from an Elements object ** and writes equivalent variables in an Pdbfile object. ** ** @param [w] pdbfile [AjPPdbfile] Pdbfile object ** @param [u] logf [AjPFile] Pointer to log file (build diagnostics) ** @param [r] elms [const AjPElements] Elements object pointer ** ** @return [AjBool] True on success, False otherwise ** @@ ****************************************************************************/ static AjBool WriteElementData(AjPPdbfile pdbfile, AjPFile logf, const AjPElements elms) { ajint x =0; ajint y =0; ajint z =0; ajint modn =0; /* Model number */ ajint idx =0; /* idx into lines in Pdbfile object */ ajint idx_start =0; /* Line index of start of element */ ajint idx_end =0; /* Line index of end of element */ ajint idx_last =0; /* Line index of last line to try */ ajint idx_tmp =0; /* Temp. line index */ ajint chn =0; /* Chain id of current element as integer */ AjPInt nsheets =NULL; /* Number of sheets in each chain */ AjPStr *lastids =NULL; /* Last sheet identifier read in for each chain */ AjBool found_start=ajFalse; /* Whether start residue of the current element has been found yet */ AjBool found_end =ajFalse; /* Whether the end residue of the current element has been found yet */ ajint n_unknown =0; /* No. of unknown chain ids */ AjPChar unknowns =NULL; /* Unknown chain ids */ AjBool found =ajFalse; /* True if we have already reported an error message for the unknown chain id */ /* Check args */ if(!pdbfile || !(elms) || !(logf)) { ajWarn("NULL arg passed to WriteElementData"); return ajFalse; } /* Assign default values for secondary structure fields in Pdbfile object */ for(x=0; xnlines; x++) { pdbfile->elementType[x]='.'; ajStrAssignC(&pdbfile->elementId[x], "."); } /* Allocate memory */ nsheets = ajIntNewL(pdbfile->nchains); ajIntPut(&nsheets, pdbfile->nchains, 0); unknowns = ajChararrNew(); AJCNEW0(lastids, pdbfile->nchains); for(x=0;xnchains;x++) { lastids[x]=ajStrNew(); /* Assign a silly value for starters */ ajStrAssignC(&lastids[x], "?????"); } /* ajFmtPrint("LOOK HERE x:%d elms->n:%d\n", x, elms->n); */ for(modn=1; modn<=pdbfile->modcnt; modn++) { /* Loop for each element. Set the current line to the first line in the file*/ for(idx=pdbfile->idxfirst, x=0;xn; x++) { /* Find the chain number of the current element */ if(!PdbfileChain(elms->elms[x]->chainId, pdbfile, &chn)) { /* Only report errors once for each unknown id */ if(modn==1) { for(found=ajFalse, y=0; yelms[x]->chainId) { found=ajTrue; break; } if(!found) { ajFmtPrintF(logf, "%-15s%c %d\n", "SECCHAIN", elms->elms[x]->chainId, idx); ajChararrPut(&unknowns, n_unknown, elms->elms[x]->chainId); n_unknown++; } } continue; } /* Only want to do this once (for the first model ) */ if(modn==1) { /* Make a count of the number of beta sheets */ if(elms->elms[x]->elementType == 'E') if(!ajStrMatchS(lastids[chn-1], elms->elms[x]->elementId)) { ajIntInc(&nsheets, chn-1); ajStrAssignS(&lastids[chn-1], elms->elms[x]->elementId); } } /* Loop for two passes. z is for efficiency, if z==0 it will check from the current position up to the last coordinate line, if z==1 it will check from the first coordinate line up to the last position checked*/ for(found_start=ajFalse, found_end=ajFalse, z=0; z<2; z++) { if(z==0) idx_last = pdbfile->nlines; else { idx=pdbfile->idxfirst; idx_last = idx_tmp; } /* Find the start and end of the current element (as an index into the line array) */ for(;idxchnn[idx] != chn) || (pdbfile->linetype[idx]!=PDBPARSE_COORD) || (pdbfile->modn[idx] != modn)) continue; /* We have not found the start residue yet */ if(!found_start) if(ajStrMatchS(elms->elms[x]->initSeqNum, pdbfile->pdbn[idx])) if(ajStrMatchS(elms->elms[x]->initResName, pdbfile->rtype[idx])) { /* Residue number for start found and residue type matches */ idx_start = idx; /* printf("found_start !\n"); */ found_start=ajTrue; } if(ajStrMatchS(elms->elms[x]->endSeqNum, pdbfile->pdbn[idx])) if(ajStrMatchS(elms->elms[x]->endResName, pdbfile->rtype[idx])) { /* Residue number for end found and residue type matches */ idx_end = idx; /* printf("idx_end: %d ...", idx_end); */ /* Set the index to the LAST atom of the residue */ for(; idx_end < pdbfile->nlines; idx_end++) { if(pdbfile->linetype[idx_end]!= PDBPARSE_COORD) continue; if(!ajStrMatchS(elms->elms[x]->endSeqNum, pdbfile->pdbn[idx_end]) || !ajStrMatchS(elms->elms[x]->endResName, pdbfile->rtype[idx_end])|| pdbfile->chnn[idx_end] != chn || pdbfile->modn[idx_end] != modn) break; } idx_end--; /* printf(" %d\n", idx_end); ajFmtPrint("found_end !\n" "elms->elms[x]->endSeqNum " ": pdbfile->pdbn[idx_end] === %S : %S\n" "elms->elms[x]->endResName " ": pdbfile->rtype[idx_end] === %S : %S\n" "pdbfile->chnn[idx_end] " ": chn === %d : %d\n" "pdbfile->modn[idx_end] " ": modn === %d : %d\n", elms->elms[x]->endSeqNum, pdbfile->pdbn[idx_end+1], elms->elms[x]->endResName, pdbfile->rtype[idx_end+1], pdbfile->chnn[idx_end+1], chn, pdbfile->modn[idx_end+1], modn); */ found_end=ajTrue; idx_tmp = idx; break; } } if(found_start && found_end) break; } if(!found_start || !found_end) { if(!found_start && !found_end) { ajFmtPrintF(logf, "%-15s%d %d %S %S %S %S\n", "SECBOTH", chn, modn, elms->elms[x]->initResName, elms->elms[x]->initSeqNum, elms->elms[x]->endResName, elms->elms[x]->endSeqNum); } else if(!found_start) { ajFmtPrintF(logf, "%-15s%d %d %S %S\n", "SECSTART", chn, modn, elms->elms[x]->initResName, elms->elms[x]->initSeqNum); } else if(!found_end) { ajFmtPrintF(logf, "%-15s%d %d %S %S\n", "SECEND", chn, modn, elms->elms[x]->endResName, elms->elms[x]->endSeqNum); } } /* Assign secondary structure fields in Pdbfile object */ for(idx=idx_start; idx<=idx_end; idx++) { pdbfile->elementNum[idx] = elms->elms[x]->elementNum; pdbfile->elementType[idx] = elms->elms[x]->elementType; if(elms->elms[x]->elementType == 'H') pdbfile->helixClass[idx] = elms->elms[x]->helixClass; ajStrAssignS(&pdbfile->elementId[idx], elms->elms[x]->elementId); } /* Only want to do this once */ if(modn==1) { if(elms->elms[x]->elementType == 'H') pdbfile->numHelices[chn-1]++; else if(elms->elms[x]->elementType == 'E') pdbfile->numStrands[chn-1]++; else if(elms->elms[x]->elementType == 'T') pdbfile->numTurns[chn-1]++; } } } /* Assign number of sheets */ if(modn==1) for(x=0; xnchains; x++) pdbfile->numSheets[x] = ajIntGet(nsheets, x); /* Tidy up and return */ ajIntDel(&nsheets); for(x=0;xnchains;x++) ajStrDel(&lastids[x]); AJFREE(lastids); ajChararrDel(&unknowns); return ajTrue; } /* ======================================================================= */ /* =========================== constructors ============================== */ /* ======================================================================= */ /* @section Constructors **************************************************** ** ** All constructors return a pointer to a new instance. It is the ** responsibility of the user to first destroy any previous instance. The ** target pointer does not need to be initialised to NULL, but it is good ** programming practice to do so anyway. ** ****************************************************************************/ /* @func ajPdbReadRawNew ************************************************** ** ** Reads a pdb file and returns a pointer to a filled Pdb object. ** ** The pdb id is derived from the file name and extension of the pdb file ** (these are passed in by argument). ** ** @param [u] inf [AjPFile] Pointer to pdb file ** @param [r] pdbid [const AjPStr] PDB id code of pdb file ** @param [r] min_chain_size [ajint] Minimum number of amino acids in a chain ** @param [r] max_mismatch [ajint] Maximum number of permissible mismatches ** between the ATOM and SEQRES sequences ** @param [r] max_trim [ajint] Max. no. residues to trim when checking ** for missing N- or C-terminal ATOM or ** SEQRES sequences. ** @param [r] camask [AjBool] Whether to mask non-amino acid groups ** within protein chains which do not have a C-alpha atom. ** @param [r] camask1 [AjBool] Whether to mask amino acid residues ** within protein chains which do not have a C-alpha atom. ** @param [r] atommask [AjBool] Whether to mask residues or groups ** in protein chains with a single atom only. ** @param [u] logf [AjPFile] Pointer to log file (build diagnostics) ** ** @return [AjPPdb] pdb object pointer, or NULL on failure. ** @@ ****************************************************************************/ AjPPdb ajPdbReadRawNew(AjPFile inf, const AjPStr pdbid, ajint min_chain_size, ajint max_mismatch, ajint max_trim, AjBool camask, AjBool camask1, AjBool atommask, AjPFile logf) { AjPPdbfile pdbfile =NULL; /* Pdbfile structure (for raw data) */ AjPPdb ret =NULL; /* Pdb structure (for parsed data) */ AjPElements elms=NULL; /* Elements structure (for parsed data)*/ if(!inf || !logf) { ajWarn("Null arg passed to ajPdbReadRawNew"); return NULL; } /* Write pdbfile structure */ if(!(pdbfile=ReadLines(inf))) { return NULL; } /* Allocate Elements object */ elms = ElementsNew(0); ajStrAssignS(&(pdbfile->pdbid), pdbid); ajStrFmtLower(&(pdbfile->pdbid)); /* Initial read of pdb file, read sequences for chains from SEQRES records, mark lines up to ignore or as coordinate lines, assigning initial residue numbers, read bibliographic information etc. */ if(!FirstPass(pdbfile, logf, &elms, camask)) { PdbfileDel(&pdbfile); ElementsDel(&elms); return NULL; } /*DIAGNOSTIC diagnostic(&pdbfile, 0); diagnostic(&pdbfile, 1);*/ /* Check that SEQRES records contain protein chains. Check that chain id's are unique */ if(!CheckChains(pdbfile, logf, min_chain_size)) { PdbfileDel(&pdbfile); ElementsDel(&elms); return NULL; } /* Check for correct number of TER records. Mask unwanted TER records */ if(!CheckTer(pdbfile, logf)) { PdbfileDel(&pdbfile); ElementsDel(&elms); return NULL; } /* Assign model and chain number to each coordinate line. Mark up non-protein coordinates */ if(!NumberChains(pdbfile, logf)) { PdbfileDel(&pdbfile); ElementsDel(&elms); return NULL; } /* Mask any ATOM or HETATM records with chain id's of chains of non-proteins or chains that have non-unique id's (chainok==ajFalse). Check that ATOM records contain protein chains. */ if(!MaskChains(pdbfile, logf, min_chain_size, camask, camask1, atommask)) { ElementsDel(&elms); PdbfileDel(&pdbfile); return NULL; } /* DIAGNOSTIC diagnostic(pdbfile, 0); diagnostic(pdbfile, 1); */ /* Standardise residue numbering */ if(!StandardiseNumbering(pdbfile, logf)) { PdbfileDel(&pdbfile); ElementsDel(&elms); return NULL; } /* Find correct residue numbering */ if(!AlignNumbering(pdbfile, logf, max_mismatch, max_trim)) { PdbfileDel(&pdbfile); ElementsDel(&elms); return NULL; } if(!WriteElementData(pdbfile, logf, elms)) { PdbfileDel(&pdbfile); ElementsDel(&elms); return NULL; } ElementsDel(&elms); /* Copy data from Pdbfile object to Pdb object. PdbfileToPdb creates the Pdb object (ret) */ if(!PdbfileToPdb(&ret, pdbfile)) { PdbfileDel(&pdbfile); ajPdbDel(&ret); return NULL; } /* Tidy up and return */ PdbfileDel(&pdbfile); return ret; } /* ======================================================================= */ /* =========================== destructors =============================== */ /* ======================================================================= */ /* @section Structure Destructors ******************************************* ** ** All destructor functions receive the address of the instance to be ** deleted. The original pointer is set to NULL so is ready for re-use. ** ****************************************************************************/ /* ======================================================================= */ /* ============================ Assignments ============================== */ /* ======================================================================= */ /* @section Assignments ***************************************************** ** ** These functions overwrite the instance provided as the first argument ** A NULL value is always acceptable so these functions are often used to ** create a new instance by assignment. ** ****************************************************************************/ /* ======================================================================= */ /* ============================= Modifiers =============================== */ /* ======================================================================= */ /* @section Modifiers ******************************************************* ** ** These functions use the contents of an instance and update them. ** ****************************************************************************/ /* ======================================================================= */ /* ========================== Operators ===================================*/ /* ======================================================================= */ /* @section Operators ******************************************************* ** ** These functions use the contents of an instance but do not make any ** changes. ** ****************************************************************************/ /* ======================================================================= */ /* ============================== Casts ===================================*/ /* ======================================================================= */ /* @section Casts *********************************************************** ** ** These functions examine the contents of an instance and return some ** derived information. Some of them provide access to the internal ** components of an instance. They are provided for programming convenience ** but should be used with caution. ** ****************************************************************************/ /* ======================================================================= */ /* =========================== Reporters ==================================*/ /* ======================================================================= */ /* @section Reporters ******************************************************* ** ** These functions return the contents of an instance but do not make any ** changes. ** ****************************************************************************/ /* ======================================================================= */ /* ========================== Input & Output ============================= */ /* ======================================================================= */ /* @section Input & output ************************************************** ** ** These functions are used for formatted input and output to file. ** ****************************************************************************/ /* @func ajPdbWriteDomainRecordRaw ***************************************** ** ** Writes lines to a PDB file. What is written depends upon the mode: ** ajHEADER_DOMAIN Header line for domain PDB file. ** ajSEQRES_DOMAIN SEQRES records for domain. ** ajATOMPDB_DOMAIN ATOM records for domain using original residue numbers. ** ajATOMIDX_DOMAIN ATOM records for domain using residues numbers that give ** correct index into SEQRES sequence. ** ** @param [r] mode [ajint] Mode that controls what is printed: one of ** ajHEADER_DOMAIN, ajSEQRES_DOMAIN, ** ajATOMPDB_DOMAIN, ajATOMIDX_DOMAIN ** ** @param [r] pdb [const AjPPdb] Pdb object ** @param [r] mod [ajint] Model number ** @param [r] scop [const AjPScop] Scop object for domain ** @param [w] outf [AjPFile] Output file stream ** @param [w] errf [AjPFile] Output file stream for error messages ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ AjBool ajPdbWriteDomainRecordRaw(ajint mode, const AjPPdb pdb, ajint mod, const AjPScop scop, AjPFile outf, AjPFile errf) { /* Check args */ if(!outf || !scop) ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw"); if(mode==ajHEADER_DOMAIN) { if(!WriteHeaderScop(outf, scop)) return ajFalse; } else if(mode==ajSEQRES_DOMAIN) { if(!errf || !pdb) ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw"); if(!WriteSeqresDomain(errf, outf, pdb, scop)) return ajFalse; } else if(mode==ajATOMPDB_DOMAIN) { if(!errf || !pdb) ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw"); if(!WriteAtomDomainPdb(errf, outf, pdb, scop, mod)) return ajFalse; } else if(mode==ajATOMIDX_DOMAIN) { if(!errf || !pdb) ajFatal("Invalid args passed to ajPdbWriteDomainRecordRaw"); if(!WriteAtomDomainIdx(errf, outf, pdb, scop, mod)) return ajFalse; } else ajFatal("Invalid mode in ajPdbWriteDomainRecordRaw"); return ajTrue; } /* @func ajPdbWriteRecordRaw ************************************************ ** ** Writes lines in pdb format to a PDB file. What is written depends upon ** the mode: ** ajSEQRES_CHAIN SEQRES records for a chain. ** ajATOMPDB_CHAIN ATOM records for chain using original residue numbers. ** ajATOMIDX_CHAIN ATOM records for domain using residues numbers that ** give correct index into SEQRES sequence. ** ajHETEROGEN ATOM line for a heterogen (small ligand). ** ajHEADER Header line. ** ajTITLE Title line. ** ajCOMPND COMPND records (info. on compound) ** ajSOURCE SOURCE records (info. on protein source) ** ajEMPTYREMARK An empty REMARK record. ** ajRESOLUTION Record with resolution of the structure. ** ** @param [r] mode [ajint] Mode that controls what is printed: one of ** ajSEQRES_CHAIN, ajATOMPDB_CHAIN, ajATOMIDX_CHAIN, ** ajHETEROGEN, ajHEADER, ajTITLE,ajCOMPND,ajSOURCE, ** ajEMPTYREMARK,ajRESOLUTION. ** @param [r] pdb [const AjPPdb] Pdb object ** @param [r] mod [ajint] Model number. ** @param [r] chn [ajint] Chain number. ** @param [w] outf [AjPFile] Output file stream ** @param [w] errf [AjPFile] Output file stream for error messages ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ AjBool ajPdbWriteRecordRaw(ajint mode, const AjPPdb pdb, ajint mod, ajint chn, AjPFile outf, AjPFile errf) { /* Check args */ if(!outf || !pdb) ajFatal("Invalid args passed to ajPdbWriteRecordRaw"); if(mode==ajSEQRES_CHAIN) { if(!errf) ajFatal("Invalid args passed to ajPdbWriteRecordRaw"); if(!WriteSeqresChain(errf, outf, pdb, chn)) return ajFalse; } else if(mode==ajATOMPDB_CHAIN) { if(!WriteAtomChain(outf, pdb, mod, chn, ajPDB)) return ajFalse; } else if(mode==ajATOMIDX_CHAIN) { if(!WriteAtomChain(outf, pdb, mod, chn, ajIDX)) return ajFalse; } else if(mode==ajHETEROGEN) { if(!WriteHeterogen(outf, pdb, mod)) return ajFalse; } else if(mode==ajHEADER) { if(!WriteHeader(outf, pdb)) return ajFalse; } else if(mode==ajTITLE) { if(!WriteTitle(outf, pdb)) return ajFalse; } else if(mode==ajCOMPND) { if(!WriteCompnd(outf, pdb)) return ajFalse; } else if(mode==ajSOURCE) { if(!WriteSource(outf, pdb)) return ajFalse; } else if(mode==ajEMPTYREMARK) { if(!WriteEmptyRemark(outf, pdb)) return ajFalse; } else if(mode==ajRESOLUTION) { if(!WriteResolution(outf, pdb)) return ajFalse; } else ajFatal("Invalid mode in ajPdbWriteRecordRaw"); return ajTrue; } /* @func ajPdbWriteAllRaw ************************************************** ** ** Writes a pdb file for a protein. ** ** @param [r] mode [ajint] Either ajPdb or ajIDX if the original or ** corrected residue number is to be used. ** @param [r] pdb [const AjPPdb] Pdb object ** @param [w] outf [AjPFile] Output file stream ** @param [w] errf [AjPFile] Output file stream for error messages ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ AjBool ajPdbWriteAllRaw(ajint mode, const AjPPdb pdb, AjPFile outf, AjPFile errf) { ajint x; ajint y; /* Write bibliographic info. */ ajPdbWriteRecordRaw(ajHEADER, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajTITLE, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajCOMPND, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajSOURCE, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajEMPTYREMARK, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajRESOLUTION, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajEMPTYREMARK, pdb, 0, 0, outf, NULL); /* Write SEQRES records */ for(x=0;xNchn;x++) if(!ajPdbWriteRecordRaw(ajSEQRES_CHAIN, pdb, 0, x+1, outf, errf)) { ajWarn("Error writing file in ajPdbWriteAllRaw"); return ajFalse; } /* Loop for each model */ for(y=0;yNmod;y++) { /* Write the MODEL record */ if(pdb->Method == ajNMR) ajFmtPrintF(outf, "MODEL%9d%66s\n", y+1, " "); /* Write ATOM/HETATM records */ for(x=0;xNchn;x++) { if(mode == ajPDB) { if(!ajPdbWriteRecordRaw(ajATOMPDB_CHAIN, pdb, y+1, x+1, outf, NULL)) { ajWarn("Error writing file in ajPdbWriteAllRaw"); return ajFalse; } } else if(mode == ajIDX) { if(!ajPdbWriteRecordRaw(ajATOMIDX_CHAIN, pdb, y+1, x+1, outf, NULL)) { ajWarn("Error writing file in ajPdbWriteAllRaw"); return ajFalse; } } else ajFatal("Invalid mode in ajPdbWriteAllRaw"); if(!ajPdbWriteRecordRaw(ajHETEROGEN, pdb, y+1, 0, outf, NULL)) { ajWarn("Error writing file in ajPdbWriteAllRaw"); return ajFalse; } /* Write ENDMDL record */ if(pdb->Method == ajNMR) ajFmtPrintF(outf, "%-80s\n", "ENDMDL"); } } /* Write END record */ ajFmtPrintF(outf, "%-80s\n", "END"); return ajTrue; } /* @func ajPdbWriteDomainRaw *********************************************** ** ** Writes a pdb file for a SCOP domain. Where coordinates for multiple ** models (e.g. NMR structures) are given, data for model 1 are written. ** Coordinates are taken from a Pdb structure, domain definition is taken ** from a Scop structure. ** In the pdb file, the coordinates are presented as belonging to a single ** chain regardless of how many chains the domain comprised. ** Coordinates for heterogens are NOT written to file. ** ** @param [r] mode [ajint] Either ajPDB or ajIDX if the original or ** corrected residue number is to be used. ** @param [r] pdb [const AjPPdb] Pdb object ** @param [r] scop [const AjPScop] Scop object ** @param [w] outf [AjPFile] Output file stream ** @param [w] errf [AjPFile] Output file stream for error messages ** ** @return [AjBool] True on succcess ** @@ ****************************************************************************/ AjBool ajPdbWriteDomainRaw(ajint mode, const AjPPdb pdb, const AjPScop scop, AjPFile outf, AjPFile errf) { ajint z; /* A counter */ ajint chn; /* No. of the chain in the pdb structure */ if(!pdb || !scop || !outf || !errf) ajFatal("Bad args passed to ajPdbWriteDomainRaw"); /* Check for errors in chain identifier and length */ for(z=0;zN;z++) if(!ajPdbChnidToNum(scop->Chain[z], pdb, &chn)) { ajWarn("Chain incompatibility error in " "ajPdbWriteDomainRaw"); ajFmtPrintF(errf, "//\n%S\nERROR Chain incompatibility error " "in ajPdbWriteDomainRaw\n", scop->Entry); return ajFalse; } else if(pdb->Chains[chn-1]->Nres==0) { ajWarn("Chain length zero"); ajFmtPrintF(errf, "//\n%S\nERROR Chain length zero\n", scop->Entry); return ajFalse; } /* Write bibliographic info. */ ajPdbWriteDomainRecordRaw(ajHEADER_DOMAIN, NULL, 0, scop, outf, NULL); ajPdbWriteRecordRaw(ajTITLE, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajCOMPND, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajSOURCE, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajEMPTYREMARK, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajRESOLUTION, pdb, 0, 0, outf, NULL); ajPdbWriteRecordRaw(ajEMPTYREMARK, pdb, 0, 0, outf, NULL); /* Write SEQRES records */ if(!ajPdbWriteDomainRecordRaw(ajSEQRES_DOMAIN, pdb, 0, scop, outf, errf)) { ajWarn("Error writing file in ajPdbWriteDomainRaw"); return ajFalse; } /* Write MODEL record, if appropriate */ if(pdb->Method == ajNMR) ajFmtPrintF(outf, "MODEL%9d%66s\n", 1, " "); /* Write ATOM/HETATM records */ if(!WriteAtomDomain(errf, outf, pdb, scop, 1, mode)) { ajWarn("Error writing file in ajPdbWriteDomainRaw"); return ajFalse; } /* Write END/ENDMDL records */ if(pdb->Method == ajNMR) ajFmtPrintF(outf, "%-80s\n", "ENDMDL"); ajFmtPrintF(outf, "%-80s\n", "END"); return ajTrue; }