/* @source remap application ** ** Display a sequence with restriction cut sites ** ** @author Copyright (C) Gary Williams (gwilliam@hgmp.mrc.ac.uk) ** 18 Jan 2000 - GWW - written ** @@ ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public License ** as published by the Free Software Foundation; either version 2 ** of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ******************************************************************************/ #include "emboss.h" #define ENZDATA "REBASE/embossre.enz" #define EQUDATA "embossre.equ" #define EQUGUESS 3500 /* Estimate of number of equivalent names */ #define TABLEGUESS 200 static void remap_read_equiv(AjPFile *equfile, AjPTable *table, AjBool commercial); static void remap_RemoveMinMax(AjPList restrictlist, AjPTable hittable, ajint mincuts, ajint maxcuts); static void remap_CutList(AjPFile outfile, const AjPTable hittable, AjBool isos, AjBool html, ajint mincuts, ajint maxcuts); static void remap_NoCutList(AjPFile outfile, const AjPTable hittable, AjBool html, const AjPStr enzymes, AjBool blunt, AjBool sticky, ajuint sitelen, AjBool commercial, AjBool ambiguity, AjBool limit, const AjPTable retable); static void remap_DelTable(AjPTable * table); static void remap_read_file_of_enzyme_names(AjPStr *enzymes); static int remap_cmpcase(const void* str1, const void* str2); static void remap_strdel(void** str, void* cl); static void remap_RenamePreferred(const AjPList list, const AjPTable table, AjPList newlist); static void remap_RestrictPreferred(const AjPList l, const AjPTable t); static AjBool remap_Ambiguous(const AjPStr str); static void remap_GetFrames(AjPStr const *framelist, AjBool *frames); /* @datastatic PValue ********************************************************* ** ** structure for counts and isoschizomers of a restriction enzyme hit ** ** @alias SValue ** @alias OValue ** ** @attr iso [AjPStr] Undocumented ** @attr count [ajint] Undocumented ** @attr Padding [char[4]] Padding to alignment boundary ******************************************************************************/ typedef struct SValue { AjPStr iso; ajint count; char Padding[4]; } OValue; #define PValue OValue* /* @prog remap **************************************************************** ** ** Display a sequence with restriction cut sites, translation etc ** ******************************************************************************/ int main(int argc, char **argv) { ajint begin, end; AjPSeqall seqall; AjPSeq seq; EmbPShow ss; AjPFile outfile; AjPStr tablename; ajint table; AjPRange uppercase; AjPRange highlight; AjBool threeletter; AjBool numberseq; AjBool nameseq; ajint width; ajint length; ajint margin; AjBool description; ajint offset; AjBool html; AjPStr descriptionline; ajint orfminsize; AjPTrn trnTable; AjBool translation; AjBool reverse; AjBool cutlist; AjBool flat; EmbPMatMatch mm = NULL; AjPStr *framelist; AjBool frames[6]; /* frames to be translated 1 to 3, -1 to -3 */ /* stuff for tables and lists of enzymes and hits */ ajint default_mincuts = 1; ajint default_maxcuts = 2000000000; AjPTable hittable; /* enzyme hits */ /* stuff lifted from Alan's 'restrict.c' */ AjPStr enzymes = NULL; ajint mincuts; ajint maxcuts; ajint sitelen; AjBool single; AjBool blunt; AjBool sticky; AjBool ambiguity; AjBool plasmid; AjBool commercial; AjBool limit; AjPFile enzfile = NULL; AjPFile equfile = NULL; AjPTable retable = NULL; ajint hits; AjPList restrictlist = NULL; embInit("remap", argc, argv); seqall = ajAcdGetSeqall("sequence"); outfile = ajAcdGetOutfile("outfile"); tablename = ajAcdGetListSingle("table"); uppercase = ajAcdGetRange("uppercase"); highlight = ajAcdGetRange("highlight"); threeletter = ajAcdGetBoolean("threeletter"); numberseq = ajAcdGetBoolean("number"); width = ajAcdGetInt("width"); length = ajAcdGetInt("length"); margin = ajAcdGetInt("margin"); nameseq = ajAcdGetBoolean("name"); description = ajAcdGetBoolean("description"); offset = ajAcdGetInt("offset"); html = ajAcdGetBoolean("html"); orfminsize = ajAcdGetInt("orfminsize"); translation = ajAcdGetBoolean("translation"); reverse = ajAcdGetBoolean("reverse"); cutlist = ajAcdGetBoolean("cutlist"); flat = ajAcdGetBoolean("flatreformat"); framelist = ajAcdGetList("frame"); /* restriction enzyme stuff */ mincuts = ajAcdGetInt("mincuts"); maxcuts = ajAcdGetInt("maxcuts"); sitelen = ajAcdGetInt("sitelen"); single = ajAcdGetBoolean("single"); blunt = ajAcdGetBoolean("blunt"); sticky = ajAcdGetBoolean("sticky"); ambiguity = ajAcdGetBoolean("ambiguity"); plasmid = ajAcdGetBoolean("plasmid"); commercial = ajAcdGetBoolean("commercial"); limit = ajAcdGetBoolean("limit"); enzymes = ajAcdGetString("enzymes"); if(!blunt && !sticky) ajFatal("Blunt/Sticky end cutters shouldn't both be disabled."); /* get the number of the genetic code used */ ajStrToInt(tablename, &table); trnTable = ajTrnNewI(table); /* read the local file of enzymes names */ remap_read_file_of_enzyme_names(&enzymes); /* get the frames to be translated */ remap_GetFrames(framelist, frames); while(ajSeqallNext(seqall, &seq)) { /* get begin and end positions */ begin = ajSeqGetBegin(seq)-1; end = ajSeqGetEnd(seq)-1; /* do the name and description */ if(nameseq) { if(html) ajFmtPrintF(outfile, "
");
/* create the format to display */
embShowAddBlank(ss);
embShowAddRE(ss, 1, restrictlist, plasmid, flat);
embShowAddSeq(ss, numberseq, threeletter, uppercase, highlight);
if(!numberseq)
embShowAddTicknum(ss);
embShowAddTicks(ss);
if(reverse)
{
embShowAddComp(ss, numberseq);
embShowAddRE(ss, -1, restrictlist, plasmid, flat);
}
if(translation)
{
if(reverse)
embShowAddBlank(ss);
if(frames[0])
embShowAddTran(ss, trnTable, 1, threeletter,
numberseq, NULL, orfminsize,
AJFALSE, AJFALSE, AJFALSE, AJFALSE);
if(frames[1])
embShowAddTran(ss, trnTable, 2, threeletter,
numberseq, NULL, orfminsize,
AJFALSE, AJFALSE, AJFALSE, AJFALSE);
if(frames[2])
embShowAddTran(ss, trnTable, 3, threeletter,
numberseq, NULL, orfminsize,
AJFALSE, AJFALSE, AJFALSE, AJFALSE);
if(reverse)
{
embShowAddTicks(ss);
if(frames[5])
embShowAddTran(ss, trnTable, -3, threeletter,
numberseq, NULL, orfminsize,
AJFALSE, AJFALSE, AJFALSE, AJFALSE);
if(frames[4])
embShowAddTran(ss, trnTable, -2, threeletter,
numberseq, NULL, orfminsize,
AJFALSE, AJFALSE, AJFALSE, AJFALSE);
if(frames[3])
embShowAddTran(ss, trnTable, -1, threeletter,
numberseq, NULL, orfminsize,
AJFALSE, AJFALSE, AJFALSE, AJFALSE);
}
}
embShowPrint(outfile, ss);
/* display a list of the Enzymes that cut and don't cut */
if(cutlist)
{
remap_CutList(outfile, hittable,
limit, html, mincuts, maxcuts);
remap_NoCutList(outfile, hittable, html, enzymes, blunt,
sticky, sitelen, commercial, ambiguity,
limit, retable);
}
/* add a gratuitous newline at the end of the sequence */
ajFmtPrintF(outfile, "\n");
/* tidy up */
embShowDel(&ss);
while(ajListPop(restrictlist,(void **)&mm))
embMatMatchDel(&mm);
ajListFree(&restrictlist);
remap_DelTable(&hittable);
ajTablestrFree(&retable);
}
ajTrnDel(&trnTable);
ajSeqallDel(&seqall);
ajSeqDel(&seq);
ajFileClose(&outfile);
ajStrDel(&tablename);
ajStrDel(&enzymes);
ajStrDelarray(&framelist);
ajRangeDel(&uppercase);
ajRangeDel(&highlight);
embExit();
return 0;
}
/* @funcstatic remap_DelTable *************************************************
**
** Delete the tables with PValue structures
**
** @param [d] table [AjPTable *] table to delete
** @return [void]
** @@
******************************************************************************/
static void remap_DelTable(AjPTable * table)
{
void **keyarray = NULL; /* array for table */
void **valarray = NULL; /* array for table */
ajint i;
PValue value;
if(ajTableGetLength(*table))
{
ajTableToarray(*table, &keyarray, &valarray);
for(i = 0; keyarray[i]; i++)
{
value = (PValue) valarray[i];
ajStrDel(&(value->iso));
AJFREE(valarray[i]); /* free the ajint* value */
ajStrDel((AjPStr*)&keyarray[i]);
}
AJFREE(keyarray);
AJFREE(valarray);
}
ajTableFree(table);
return;
}
/* @funcstatic remap_RemoveMinMax *********************************************
**
** Remove the enzymes that hit more than maxcut or less than mincut from
** restrictlist.
** Populate hittable with enzymes names and hit counts.
**
** @param [u] restrictlist [AjPList] List to prune
** @param [u] hittable [AjPTable] table of number of hits for each enzyme
** @param [r] mincuts [ajint] mincuts
** @param [r] maxcuts [ajint] maxcuts
** @return [void]
** @@
******************************************************************************/
static void remap_RemoveMinMax(AjPList restrictlist,
AjPTable hittable, ajint mincuts, ajint maxcuts)
{
AjIList miter; /* iterator for matches list */
EmbPMatMatch m = NULL; /* restriction enzyme match structure */
PValue value;
AjPStr key = NULL;
AjPStr keyv = NULL;
key = ajStrNew();
/* if no hits then ignore much of this routine */
if(ajListGetLength(restrictlist))
{
/* count the enzymes */
miter = ajListIterNewread(restrictlist);
while((m = ajListIterGet(miter)) != NULL)
{
ajStrAssignS(&key, m->cod);
/* increment the count of key */
value = (PValue) ajTableFetch(hittable, (const void *)key);
if(value == NULL)
{
AJNEW0(value);
value->count = 1;
value->iso = ajStrNew();
ajStrAssignS(&(value->iso), m->iso);
keyv = ajStrNew();
ajStrAssignS(&keyv,key);
ajTablePut(hittable, (void *)keyv, (void *)value);
}
else
value->count++;
}
ajListIterDel(&miter);
/* now remove enzymes from restrictlist if maxcuts */
miter = ajListIterNew(restrictlist);
while((m = ajListIterGet(miter)) != NULL)
{
value = (PValue) ajTableFetch(hittable, (const void *)(m->cod));
if(value->count < mincuts || value->count > maxcuts)
{
ajListIterRemove(miter);
embMatMatchDel(&m);
}
}
ajListIterDel(&miter);
}
ajStrDel(&key);
return;
}
/* @funcstatic remap_CutList **************************************************
**
** display a list of the enzymes that cut
**
** @param [u] outfile [AjPFile] file to print to.
** @param [r] hittable [const AjPTable] table of number of hits for each enzyme
** @param [r] isos [AjBool] True if allow isoschizomers
** @param [r] html [AjBool] dump out html if true.
** @param [r] mincuts [ajint] min required cuts
** @param [r] maxcuts [ajint] max required cuts
** @return [void]
** @@
******************************************************************************/
static void remap_CutList(AjPFile outfile, const AjPTable hittable,
AjBool isos,
AjBool html, ajint mincuts, ajint maxcuts)
{
PValue value;
void **keyarray = NULL; /* array for table */
void **valarray = NULL; /* array for table */
ajint i;
/* print title */
if(html)
ajFmtPrintF(outfile, "");
ajFmtPrintF(outfile, "\n\n# Enzymes that cut Frequency");
if(isos)
ajFmtPrintF(outfile, "\tIsoschizomers\n");
else
ajFmtPrintF(outfile, "\n");
if(html)
ajFmtPrintF(outfile, "
\n");
if(ajTableGetLength(hittable))
{
ajTableToarray(hittable, &keyarray, &valarray);
qsort(keyarray, ajTableGetLength(hittable), sizeof (*keyarray),
ajStrVcmp);
/* enzymes that cut the required number of times */
if(html)
ajFmtPrintF(outfile, "");
for(i = 0; keyarray[i]; i++)
{
value = ajTableFetch(hittable,keyarray[i]);
if(value->count >= mincuts && value->count <= maxcuts)
ajFmtPrintF(outfile, "%10S\t %d\t%S\n",
(AjPStr) keyarray[i], value->count,
value->iso);
}
ajFmtPrintF(outfile, "\n");
if(html)
ajFmtPrintF(outfile, "\n");
}
/* enzymes that cut ");
ajFmtPrintF(outfile, "\n\n# Enzymes which cut less frequently than the ");
ajFmtPrintF(outfile, "MINCUTS criterion\n# Enzymes < MINCUTS Frequency");
if(isos)
ajFmtPrintF(outfile, "\tIsoschizomers\n");
else
ajFmtPrintF(outfile, "\n");
if(html)
ajFmtPrintF(outfile, "\n");
if(ajTableGetLength(hittable))
{
/* print out results */
if(html)
ajFmtPrintF(outfile, "");
for(i = 0; keyarray[i]; i++)
{
value = ajTableFetch(hittable,keyarray[i]);
if(value->count < mincuts)
ajFmtPrintF(outfile, "%10S\t %d\t%S\n",
(AjPStr) keyarray[i], value->count,
value->iso);
}
ajFmtPrintF(outfile, "\n");
if(html)
ajFmtPrintF(outfile, "\n");
}
/* enzymes that cut >maxcuts */
/* print title */
if(html)
ajFmtPrintF(outfile, "");
ajFmtPrintF(outfile, "\n\n# Enzymes which cut more frequently than the ");
ajFmtPrintF(outfile, "MAXCUTS criterion\n# Enzymes > MAXCUTS Frequency");
if(isos)
ajFmtPrintF(outfile, "\tIsoschizomers\n");
else
ajFmtPrintF(outfile, "\n");
if(html)
ajFmtPrintF(outfile, "
\n");
if(ajTableGetLength(hittable))
{
/* print out results */
if(html)
ajFmtPrintF(outfile, "");
for(i = 0; keyarray[i]; i++)
{
value = (PValue) valarray[i];
if(value->count > maxcuts)
ajFmtPrintF(outfile, "%10S\t %d\t%S\n",
(AjPStr) keyarray[i], value->count,
value->iso);
}
ajFmtPrintF(outfile, "\n");
if(html)
ajFmtPrintF(outfile, "\n");
AJFREE(keyarray);
AJFREE(valarray);
}
return;
}
/* @funcstatic remap_NoCutList ************************************************
**
** display a list of the enzymes that do NOT cut
**
** @param [u] outfile [AjPFile] file to print to.
** @param [r] hittable [const AjPTable] Enzymes that cut
** @param [r] html [AjBool] dump out html if true.
** @param [r] enzymes [const AjPStr] names of enzymes to search for or 'all'
** @param [r] blunt [AjBool] Allow blunt cutters
** @param [r] sticky [AjBool] Allow sticky cutters
** @param [r] sitelen [ajuint] minimum length of recognition site
** @param [r] commercial [AjBool] Allow commercially supplied cutters
** @param [r] ambiguity [AjBool] Allow ambiguous patterns
** @param [r] limit [AjBool] True if allow isoschizomers
** @param [r] retable [const AjPTable] Table from embossre.equ file
** @return [void]
** @@
******************************************************************************/
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable,
AjBool html, const AjPStr enzymes, AjBool blunt,
AjBool sticky, ajuint sitelen, AjBool commercial,
AjBool ambiguity, AjBool limit,
const AjPTable retable)
{
/* for iterating over hittable */
PValue value;
void **keyarray = NULL; /* array for table */
void **valarray = NULL; /* array for table */
ajint i;
/* list of enzymes that cut */
AjPList cutlist;
AjIList citer; /* iterator for cutlist */
AjPStr cutname = NULL;
AjBool found;
/* for parsing value->iso string */
AjPStrTok tok;
char tokens[] = " ,";
AjPStr code = NULL;
const char *p;
/* for reading in enzymes names */
AjPFile enzfile = NULL;
AjPStr *ea;
ajint ne; /* number of enzymes */
AjBool isall = ajTrue;
/* list of enzymes that don't cut */
AjPList nocutlist;
AjIList niter; /* iterator for nocutlist */
AjPStr nocutname = NULL;
/* count of rejected enzymes not matching criteria */
ajint rejected_count = 0;
EmbPPatRestrict enz;
/* for renaming preferred isoschizomers */
AjPList newlist;
/*
**
** Make a list of enzymes('cutlist') that hit
** including the isoschizomer names
**
*/
ajDebug("Make a list of all enzymes that cut\n");
cutlist = ajListstrNew();
nocutlist = ajListstrNew();
ajTableToarray(hittable, &keyarray, &valarray);
for(i = 0; keyarray[i]; i++)
{
value = (PValue) valarray[i];
cutname = ajStrNew();
ajStrAssignRef(&cutname, keyarray[i]);
ajListstrPushAppend(cutlist, cutname);
/* Add to cutlist all isoschizomers of enzymes that cut */
ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n");
/* start token to parse isoschizomers names */
tok = ajStrTokenNewC(value->iso, tokens);
while(ajStrTokenNextParseC(&tok, tokens, &code))
{
cutname = ajStrNew();
ajStrAssignS(&cutname, code);
ajListstrPushAppend(cutlist, cutname);
}
ajStrTokenDel(&tok);
}
ajStrDel(&code);
AJFREE(keyarray);
AJFREE(valarray);
/*
** Read in list of enzymes ('nocutlist') - either all or
** the input enzyme list.
** Exclude those that don't match the selection criteria - count these.
*/
ajDebug("Read in a list of all input enzyme names\n");
ne = 0;
if(!enzymes)
isall = ajTrue;
else
{
/* get input list of enzymes into ea[] */
ne = ajArrCommaList(enzymes, &ea);
if(ajStrMatchCaseC(ea[0], "all"))
isall = ajTrue;
else
{
isall = ajFalse;
for(i=0; icod))
{
found = AJTRUE;
break;
}
if(!found) /* not in the explicit list */
continue;
ajDebug("RE %S is in the input explicit list of REs\n", enz->cod);
}
/* ignore ncuts==0 as they are unknown */
if(!enz->ncuts)
{
/* number of cut positions */
ajDebug("RE %S has an unknown number of cut positions\n",
enz->cod);
continue;
}
ajDebug("RE %S has a known number of cut sites\n", enz->cod);
if(enz->len < sitelen)
{
/* recognition site length */
ajDebug("RE %S does not have a long enough recognition site\n",
enz->cod);
rejected_count++;
continue;
}
if(!blunt && enz->blunt)
{
/* blunt/sticky */
ajDebug("RE %S is blunt\n", enz->cod);
rejected_count++;
continue;
}
if(!sticky && !enz->blunt)
{
/* blunt/sticky */
ajDebug("RE %S is sticky\n", enz->cod);
rejected_count++;
continue;
}
/* commercially available enzymes have uppercase patterns */
p = ajStrGetPtr(enz->pat);
/*
** The -commercial qualifier is only used if we are searching
** through 'all' of the REBASE database - if we have specified an
** explicit list of enzymes then they are searched for whether or
** not they are commercially available
*/
if((*p >= 'a' && *p <= 'z') && commercial && isall)
{
ajDebug("RE %S is not commercial\n", enz->cod);
rejected_count++;
continue;
}
if(!ambiguity && remap_Ambiguous(enz->pat)) {
ajDebug("RE %S is ambiguous\n", enz->cod);
rejected_count++;
continue;
}
ajDebug("RE %S matches all required criteria\n", enz->cod);
code = ajStrNew();
ajStrAssignS(&code, enz->cod);
ajListstrPushAppend(nocutlist, code);
}
embPatRestrictDel(&enz);
ajFileClose(&enzfile);
for(i=0; i");
ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n");
if(html)
ajFmtPrintF(outfile, "\n");
if(html)
ajFmtPrintF(outfile, "");
/* ajListSort(nocutlist, ajStrVcmp);*/
niter = ajListIterNewread(nocutlist);
i = 0;
while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL)
{
ajFmtPrintF(outfile, "%-10S", nocutname);
/* new line after every 7 names printed */
if(i++ == 7)
{
ajFmtPrintF(outfile, "\n");
i = 0;
}
}
ajListIterDel(&niter);
/* end the output */
ajFmtPrintF(outfile, "\n");
if(html) {ajFmtPrintF(outfile, "\n");}
/*
** Print the count of rejected enzymes
** N.B. This is the count of ALL rejected enzymes including all
** isoschizomers
*/
if(html)
ajFmtPrintF(outfile, "");
ajFmtPrintF(outfile,
"\n\n# No. of cutting enzymes which do not match the\n"
"# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS citeria\n\n");
if(html)
ajFmtPrintF(outfile, "
\n");
ajFmtPrintF(outfile, "%d\n", rejected_count);
ajDebug("Tidy up\n");
ajListstrFreeData(&nocutlist);
ajListstrFreeData(&cutlist);
return;
}
/* @funcstatic remap_read_equiv ***********************************************
**
** Lifted from Alan's restrict.c - reads the embossre.equ file.
**
** @param [u] equfile [AjPFile*] file to read then close.
** @param [wP] table [AjPTable*] table to write to.
** @param [r] commercial [AjBool] supplier test for asterisk removal
** @return [void]
** @@
******************************************************************************/
static void remap_read_equiv(AjPFile *equfile, AjPTable *table,
AjBool commercial)
{
AjPStr line;
AjPStr key;
AjPStr value;
const char *p;
line = ajStrNew();
while(ajReadlineTrim(*equfile,&line))
{
p = ajStrGetPtr(line);
if(!*p || *p=='#' || *p=='!')
continue;
p = ajSysFuncStrtok(p," \t\n");
key = ajStrNewC(p);
p = ajSysFuncStrtok(NULL," \t\n");
value = ajStrNewC(p);
if(!commercial)
ajStrTrimEndC(&value,"*");
ajTablePut(*table,(void *)key, (void *)value);
}
ajFileClose(equfile);
ajStrDel(&line);
return;
}
/* @funcstatic remap_read_file_of_enzyme_names ********************************
**
** If the list of enzymes starts with a '@' it opens that file, reads in
** the list of enzyme names and replaces the input string with the enzyme names
**
** @param [w] enzymes [AjPStr*] names of enzymes to search for or 'all' or
** '@file'
** @return [void]
** @@
******************************************************************************/
static void remap_read_file_of_enzyme_names(AjPStr *enzymes)
{
AjPFile file = NULL;
AjPStr line;
const char *p = NULL;
if(ajStrFindC(*enzymes, "@") == 0)
{
ajStrTrimC(enzymes, "@"); /* remove the @ */
file = ajFileNewInNameS(*enzymes);
if(file == NULL)
ajFatal("Cannot open the file of enzyme names: '%S'", enzymes);
/* blank off the enzyme file name and replace with the enzyme names */
ajStrSetClear(enzymes);
line = ajStrNew();
while(ajReadlineTrim(file, &line))
{
p = ajStrGetPtr(line);
if(!*p || *p == '#' || *p == '!')
continue;
ajStrAppendS(enzymes, line);
ajStrAppendC(enzymes, ",");
}
ajStrDel(&line);
ajFileClose(&file);
}
return;
}
/* @funcstatic remap_cmpcase *********************************************
**
** Compares the value of two strings for use in sorting (e.g. ajListSort)
** Case Independent!
**
** @param [r] str1 [const void*] First string
** @param [r] str2 [const void*] Second string
** @return [int] -1 if first string should sort before second, +1 if the
** second string should sort first. 0 if they are identical
** in length and content.
** @@
******************************************************************************/
static int remap_cmpcase(const void* str1, const void* str2)
{
const char* cp;
const char* cq;
for(cp = (*(AjPStr const *)str1)->Ptr, cq = (*(AjPStr const *)str2)->Ptr;
*cp && *cq; cp++, cq++)
if(toupper((ajint) *cp) != toupper((ajint) *cq))
{
if(toupper((ajint) *cp) > toupper((ajint) *cq))
return 1;
else
return -1;
}
if(*cp)
return 1;
if(*cq)
return -1;
return 0;
}
/* @funcstatic remap_strdel *********************************************
**
** Deletes a string when called by ajListUnique
**
** @param [r] str [void**] string to delete
** @param [r] cl [void*] not used
** @return [void]
** @@
******************************************************************************/
static void remap_strdel(void** str, void* cl)
{
(void) cl; /* make it used */
ajStrDel((AjPStr*)str);
return;
}
/* @funcstatic remap_RenamePreferred ******************************************
**
** Iterates through a list of strings
** Forteach string it checks if that string occurs as
** a key in a table.
** If a match is found then the value of the table entry is appended
** to the output list, else the old string name is appended to the output list.
**
** @param [r] list [const AjPList] Inout list of strings
** @param [r] table [const AjPTable] Table of replacements
** @param [u] newlist [AjPList] Returned new list of strings
** @return [void]
** @@
******************************************************************************/
static void remap_RenamePreferred(const AjPList list, const AjPTable table,
AjPList newlist)
{
AjIList iter = NULL;
AjPStr key = NULL;
AjPStr value = NULL;
AjPStr name = NULL;
iter = ajListIterNewread(list);
while((key = (AjPStr)ajListIterGet(iter)))
{
/*
** If a key-value entry found, write the new value to the new list
** else write the old key name to the new list
*/
value = ajTableFetch(table, key);
name = ajStrNew();
if(value)
{
ajDebug("Rename: %S renamed to %S\n", key, value);
ajStrAssignS(&name, value);
}
else
{
ajDebug("Rename: %S not found\n", key);
ajStrAssignS(&name, key);
}
ajListstrPushAppend(newlist, name);
}
ajListIterDel(&iter);
return;
}
/* @funcstatic remap_RestrictPreferred ***************************************
**
** Replace RE names by the name of the prototype for that RE
** This is derived from embPatRestrictPreferred - it differs in that it also
** converts the name of the prototype RE in the list of isoschizomers
** into the name that is being changed in m->cod.
** i.e a name of X with prototype name B and isoschizomer list of "A, B, C"
** will be change to a name of B and isoschizomer list of "A, X, C"
** If the old name is not in the isoschizomer list, it will be added to it.
**
** @param [r] l [const AjPList] list of EmbPMatMatch hits
** @param [r] t [const AjPTable] table from embossre.equ file
**
** @return [void]
** @@
******************************************************************************/
static void remap_RestrictPreferred(const AjPList l, const AjPTable t)
{
AjIList iter = NULL;
EmbPMatMatch m = NULL;
AjPStr value = NULL;
AjPStr newiso = NULL;
AjBool found; /* name found in isoschizomer list */
/* for parsing value->iso string */
AjPStrTok tok = NULL;
char tokens[] = " ,";
AjPStr code = NULL;
iter = ajListIterNewread(l);
while((m = (EmbPMatMatch)ajListIterGet(iter)))
{
found = ajFalse;
/* get prototype name */
value = ajTableFetch(t, m->cod);
if(value)
{
ajStrAssignC(&newiso, "");
/* parse isoschizomer names from m->iso */
ajStrTokenDel(&tok);
tok = ajStrTokenNewC(m->iso, tokens);
while(ajStrTokenNextParseC(&tok, tokens, &code))
{
if(ajStrGetLen(newiso) > 0)
ajStrAppendC(&newiso, ",");
/* found the prototype name? */
if(!ajStrCmpCaseS(code, value))
{
ajStrAppendS(&newiso, m->cod);
found = ajTrue;
}
else
ajStrAppendS(&newiso, code);
}
ajStrTokenDel(&tok);
/* if the name was not replaced, then add it in now */
if(!found)
{
if(ajStrGetLen(newiso) > 0)
ajStrAppendC(&newiso, ",");
ajStrAppendS(&newiso, m->cod);
}
ajDebug("RE: %S -> %S iso=%S newiso=%S\n", m->cod, value,
m->iso, newiso);
/* replace the old iso string with the new one */
ajStrAssignS(&m->iso, newiso);
/* rename the enzyme to the prototype name */
ajStrAssignS(&m->cod, value);
}
}
ajListIterDel(&iter);
ajStrDel(&newiso);
ajStrDel(&code);
ajStrTokenDel(&tok);
return;
}
/* @funcstatic remap_Ambiguous ***************************************
**
** Tests whether there are ambiguity base codes in a string
**
** @param [r] str [const AjPStr] String to test
**
** @return [AjBool] True is ambiguous bases found
** @@
******************************************************************************/
static AjBool remap_Ambiguous(const AjPStr str)
{
ajuint ipos;
char chr;
for (ipos=0; ipos