/* @source embaln.c ** ** General routines for alignment. ** Copyright (c) 1999 Alan Bleasby ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public License ** as published by the Free Software Foundation; either version 2 ** of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ******************************************************************************/ #include "emboss.h" #include #include #include #define GAPO 26 #define GAPE 27 #define DIAG 0 #define LEFT 1 #define DOWN 2 /* @func embAlignPathCalc ***************************************************** ** ** Create path matrix for Needleman-Wunsch ** Nucleotides or proteins as needed. ** ** @param [r] a [const char *] first sequence ** @param [r] b [const char *] second sequence ** @param [r] lena [ajint] length of first sequence ** @param [r] lenb [ajint] length of second sequence ** @param [r] gapopen [float] gap opening penalty ** @param [r] gapextend [float] gap extension penalty ** @param [w] path [float *] path matrix ** @param [r] sub [float * const *] substitution matrix from AjPMatrixf ** @param [r] cvt [const AjPSeqCvt] Conversion array for AjPMatrixf ** @param [w] compass [ajint *] Path direction pointer array ** @param [r] show [AjBool] Display path matrix ** ** @return [float] Maximum score ** @@ ** Optimised to keep a maximum value to avoid looping down or left ** to find the maximum. (il 29/07/99) ** ******************************************************************************/ float embAlignPathCalc(const char *a, const char *b, ajint lena, ajint lenb, float gapopen, float gapextend, float *path, float * const *sub, const AjPSeqCvt cvt, ajint *compass, AjBool show) { ajint xpos; ajint ypos; ajint i; ajint j; float match; float mscore; float fnew; float *maxa; float maxb; static AjPStr outstr = NULL; char compasschar; float ret = -FLT_MAX; ajDebug("embAlignPathCalc\n"); /* Create stores for the maximum values in a row or column */ maxa = AJALLOC(lena*sizeof(float)); /* First initialise the first column and row */ for(i=0;i 1) { maxa[ypos] -= gapextend; fnew=path[(ypos)*lenb+xpos-1]; fnew-=gapopen; if(fnew > maxa[ypos]) maxa[ypos] = fnew; if( maxa[ypos] > mscore) { mscore = maxa[ypos]; path[ypos*lenb+xpos] = mscore; compass[ypos*lenb+xpos] = 1; /* Score comes from left */ } } /* And then bimble down Y axis */ if(ypos>1) { maxb -= gapextend; fnew = path[(ypos-1)*lenb+xpos]; fnew-=gapopen; if(fnew > maxb) maxb = fnew; if(maxb > mscore) { mscore = maxb; path[ypos*lenb+xpos] = mscore; compass[ypos*lenb+xpos] = 2; /* Score comes from bottom */ } } ajDebug("\n"); ypos++; } ++xpos; } for(i=0;iret) ret = path[(lena-1)*lenb+i]; for(j=0;jret) ret=path[j*lenb+lenb-1]; if(show) { for(i=lena-1;i>-1;--i) { ajFmtPrintS(&outstr, "%6d ", i); for(j=0;j 0.) ? result : (float)0.; compass[i*lenb] = 0; maxa[i] = path[i*lenb]-gapopen; } for(j=0;j 0.) ? result : (float)0.; compass[j] = 0; } /* xpos and ypos are the diagonal steps so start at 1 */ xpos = 1; while(xpos!=lenb) { ypos = 1; bx = path[xpos]-gapopen-gapextend; while(ypos < lena) { /* get match for current xpos/ypos */ match = sub[ajSeqcvtGetCodeK(cvt,a[ypos])] [ajSeqcvtGetCodeK(cvt,b[xpos])]; /* Get diag score */ mscore = path[(ypos-1)*lenb+xpos-1] + match; ajDebug("xpos:%d ypos:%d mscore: %.2f\n", xpos, ypos, mscore); /* ajDebug("Opt %d %6.2f ",ypos*lenb+xpos,mscore); */ /* Set compass to diagonal value 0 */ compass[ypos*lenb+xpos] = 0; path[ypos*lenb+xpos] = mscore; /* Now parade back along X axis */ if(xpos > 1) { maxa[ypos] -= gapextend; fnew=path[(ypos)*lenb+xpos-1]; fnew-=gapopen; ajDebug("Xtest: fnew:%.2f maxa[%d] %.2f\n", fnew, ypos, maxa[ypos]); if(fnew > maxa[ypos]) maxa[ypos] = fnew; if( maxa[ypos] > mscore) { mscore = maxa[ypos]; path[ypos*lenb+xpos] = mscore; compass[ypos*lenb+xpos] = 1; /* Score comes from left */ ajDebug("Xused: fnew:%.2f maxa[%d] %.2f mscore:%.2f\n", fnew, ypos, maxa[ypos],mscore); } } /* And then bimble down Y axis */ if(ypos > 1) { bx -= gapextend; fnew = path[(ypos-1)*lenb+xpos]; fnew-=gapopen; if(fnew > bx) bx = fnew; if(bx > mscore) { mscore = bx; path[ypos*lenb+xpos] = mscore; compass[ypos*lenb+xpos] = 2; /* Score comes from bottom */ } } if(mscore > ret) ret = mscore; result = path[ypos*lenb+xpos]; if(result < 0.) path[ypos*lenb+xpos] = 0.; ypos++; } ++xpos; } if(show) { for(i=lena-1;i>-1;--i) { ajFmtPrintS(&outstr, "%6d ", i); for(j=0;jpmax) { pmax = path[k-1]; xpos = j; ypos = i; } ajStrAssignClear(m); ajStrAssignClear(n); p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); while(xpos>=0 && ypos>=0) { if(!compass[ypos*lenb+xpos]) /* diagonal */ { ajStrAppendK(m,p[ypos--]); ajStrAppendK(n,q[xpos--]); if(ypos >= 0 && xpos>=0 && path[(ypos)*lenb+xpos]<=0.) break; continue; } else if(compass[ypos*lenb+xpos]==1) /* Left, gap(s) in vertical */ { score = path[ypos*lenb+xpos]; gapcnt = 0.; ix = xpos-1; while(1) { bimble = path[ypos*lenb+ix]-gapopen-(gapcnt*gapextend); if(!ix || fabs((double)score-(double)bimble)=pmax) { pmax = path[(lena-1)*lenb+i]; xpos = i; ypos = lena-1; } for(j=0;jpmax) { pmax = path[j*lenb+lenb-1]; xpos = lenb-1; ypos = j; } ajStrAssignClear(m); ajStrAssignClear(n); p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); while(xpos>=0 && ypos>=0) { if(!compass[ypos*lenb+xpos]) /* diagonal */ { ajStrAppendK(m,p[ypos--]); ajStrAppendK(n,q[xpos--]); continue; } else if(compass[ypos*lenb+xpos]==1) /* Left, gap(s) in vertical */ { score = path[ypos*lenb+xpos]; gapcnt = 0.; ix = xpos-1; while(1) { bimble = path[ypos*lenb+ix]-gapopen-(gapcnt*gapextend); if(!ix || fabs((double)score-(double)bimble)< errbounds) break; --ix; ++gapcnt; } for(ic=0;ic<=gapcnt;++ic) { ajStrAppendK(m,'.'); ajStrAppendK(n,q[xpos--]); } continue; } else if(compass[ypos*lenb+xpos]==2) /* Down, gap(s) in horizontal */ { score = path[ypos*lenb+xpos]; gapcnt = 0.; iy = ypos-1; while(1) { bimble = path[iy*lenb+xpos]-gapopen-(gapcnt*gapextend); if(!iy || fabs((double)score-(double)bimble)< errbounds) break; --iy; ++gapcnt; } for(ic=0;ic<=gapcnt;++ic) { ajStrAppendK(m,p[ypos--]); ajStrAppendK(n,'.'); } continue; } else ajFatal("Walk Error in NW"); } *start2 = xpos+1; *start1 = ypos+1; ajStrReverse(m); /* written with append, need to reverse */ ajStrReverse(n); return; } /* @func embAlignPrintGlobal ************************************************** ** ** Print a global alignment ** Nucleotides or proteins as needed. ** ** @param [u] outf [AjPFile] output stream ** @param [r] a [const char *] complete first sequence ** @param [r] b [const char *] complete second sequence ** @param [r] m [const AjPStr] Walk alignment for first sequence ** @param [r] n [const AjPStr] Walk alignment for second sequence ** @param [r] start1 [ajint] start of alignment in first sequence ** @param [r] start2 [ajint] start of alignment in second sequence ** @param [r] score [float] alignment score from AlignScoreX ** @param [r] mark [AjBool] mark matches and conservatives ** @param [r] sub [float * const *] substitution matrix ** @param [r] cvt [const AjPSeqCvt] conversion table for matrix ** @param [r] namea [const char *] name of first sequence ** @param [r] nameb [const char *] name of second sequence ** @param [r] begina [ajint] first sequence offset ** @param [r] beginb [ajint] second sequence offset ** ** @return [void] ******************************************************************************/ void embAlignPrintGlobal(AjPFile outf, const char *a, const char *b, const AjPStr m, const AjPStr n, ajint start1, ajint start2, float score, AjBool mark, float * const *sub, const AjPSeqCvt cvt, const char *namea, const char *nameb, ajint begina, ajint beginb) { AjPStr fa; AjPStr fb; AjPStr fm; AjPStr ap; AjPStr bp; AjPStr mp; ajint i; ajint nc; ajint olen; const char *p; const char *q; const char *r = NULL; float match = 0.0; ajint apos; ajint bpos; ajint alen; ajint blen; ajint acnt; ajint bcnt; ajint aend; ajint bend; ajint len; ajint pos; fa = ajStrNewC(""); fb = ajStrNewC(""); fm = ajStrNewC(""); ap = ajStrNewC(""); bp = ajStrNewC(""); mp = ajStrNewC(""); if(start1>start2) { for(i=0;istart1) { for(i=0;i0.0) ajStrAppendC(&fm,":"); else ajStrAppendC(&fm," "); } } /* Set pointers to sequence remainders */ for(i=0,apos=start1,bpos=start2;iblen) { ajStrAppendC(&fa,&a[apos]); for(i=0;ialen) { ajStrAppendC(&fb,&b[bpos]); for(i=0;i0.0) ajStrAppendC(&fm,":"); else ajStrAppendC(&fm," "); } } /* Get start residues */ p = ajStrGetPtr(fa); q = ajStrGetPtr(fb); acnt = begina+start1; bcnt = beginb+start2; len = ajStrGetLen(fa); pos = 0; if(mark) r=ajStrGetPtr(fm); /* Add header stuff here */ ajFmtPrintF(outf,"Local: %s vs %s\n",namea,nameb); ajFmtPrintF(outf,"Score: %.2f\n\n",score); while(pos= 0) /* x (b) offset */ ymin = 0; else ymin = -offset; ymax = lenb + leftwidth-1 - offset; if(ymax > lena) ymax = lena; if(offset <= 0) /* y (a) offset) */ xmin = -leftwidth; else xmin = offset - leftwidth; xmax = lena + rightwidth-1 + offset; if(xmax > lenb) xmax = lenb; ajDebug("embAlignPathCalcSWFast\n"); ajDebug("lena:%d lenb:%d ymin:%d ymax:%d xmin:%d xmax:%d\n", lena, lenb, ymin, ymax, xmin,xmax); ajDebug("pathwidth:%d width:%d leftwidth:%d rightwidth:%d\n", pathwidth, width, leftwidth, rightwidth); ajDebug("a: '%s'\n", a); ajDebug("b: '%s'\n", b); ajDebug("FLT_MIN: %6.1f\n", -FLT_MAX); /* ajDebug("lena: %d lenb: %d width: %d pathwidth: %d\n", lena, lenb, width, pathwidth); */ /* ajDebug("a: '%10.10s .. %10.10s' %d\n", a, &a[jlena], lena); */ /* ajDebug("b: '%10.10s .. %10.10s' %d\n", b, &b[jlenb], lenb); */ /* Create stores for the maximum values in a row or column */ maxb = AJALLOC(lenb*sizeof(float)); /* First initialise the first column and row */ for(i=0;i= xmax) break; match = sub[ajSeqcvtGetCodeK(cvt,a[irow])] [ajSeqcvtGetCodeK(cvt,b[icol])]; if(match > 0.0) ajDebug("match %.1f irow:%d icol:%d i:%d ip:%d a:%c b:%c\n", match, irow, icol, i, ip, a[irow], b[icol]); /* Get diag score */ mscore = path[ip-width] + match; if(mscore < 0.0) mscore = 0.0; /* Set compass to diagonal value 0 */ path[ip] = mscore; if(i > 0) { maxa -= gapextend; fnew = path[ip-1]; fnew -= gapopen; if(fnew > maxa) maxa = fnew; if( maxa > mscore) { mscore = maxa; path[ip] = mscore; compass[ip] = 1; /* Score comes from left */ } } if(irow > 0) { if(i == width-1) { maxb[icol] = match-(gapopen); } else { maxb[icol] -= gapextend; fnew=path[ip-width]; fnew-=gapopen; if(fnew>maxb[icol]) maxb[icol] = fnew; } if(maxb[icol] > mscore) { mscore = maxb[icol]; path[ip] = mscore; compass[ip] = 2; /* Score comes from bottom */ } } if(mscore > ret) ret = mscore; } irow++; xmin++; } if(show) { max = -FLT_MAX; for(i=ymax;i>=ymin;--i) { ajFmtPrintS(&outstr, "%6d ", i); for(k=0;k max) max = path[i*width+j]; } ajDebug("%S\n", outstr); } ajFmtPrintS(&outstr, " "); for(k=0;k= 0) /* x (b) offset */ ymin = 0; else ymin = -offset; ymax = lenb + leftwidth-1 - offset; if(ymax > lena) ymax = lena; xmax = lena + rightwidth-1 + offset; if(xmax > lenb) xmax = lenb; /* Get maximum path score and save position */ pmax = -FLT_MAX; k=0; for(i=ymin;ipmax) { pmax = path[k-1]; xpos = j; ypos = i; } ajStrAssignClear(m); ajStrAssignClear(n); p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); p += (*start1); q += (*start2); xpos2 = xpos+ypos-leftwidth; ajDebug("ypos:%d xpos:%d xpos2: %d start1:%d start2:%d\n", ypos, xpos, xpos2, *start1, *start2); while(xpos2>=0 && ypos>=0 && path[ypos*width+xpos] >0.) { ip = ypos*width+xpos; if(!compass[ip]) /* diagonal: xpos stays the same */ { ajDebug("comp:%d %c %c ypos:%d xpos:%d xpos2:%d path[%d]:%d\n", compass[ip], p[ypos], q[xpos2], ypos, xpos, xpos2, ip, path[ip]); ajStrAppendK(m,p[ypos--]); ajStrAppendK(n,q[xpos2--]); if(xpos2>=0 && ypos>=0 && path[ip-width]<=0.0) break; continue; } else if(compass[ip]==1) /* Left, horizontal gap(s): step through xpos */ { score = path[ip]; gapcnt = 0.; ix = xpos-1; while(1) { bimble=path[--ip]-gapopen-(gapcnt*gapextend); if(!ix || fabs((double)score-(double)bimble)1) { maxs[column] -= gapextend*pmatrix[row][GAPE]; fnew=path[row*seqlen+column-1]; fnew-=gapopen*pmatrix[row][GAPO]; if(fnew > maxs[column]) maxs[column] = fnew; if( maxs[column] > mscore) { mscore = maxs[column]; path[row*seqlen+column] = mscore; compass[row*seqlen+column] = 1; /* Score from left */ } } /* And then bimble down Y axis */ if(row>1) { maxp -= gapextend*pmatrix[row-1][GAPE]; fnew = path[(row-1)*seqlen+column]; fnew-=gapopen*pmatrix[row-1][GAPO]; if(fnew > maxp) maxp = fnew; if(maxp > mscore) { mscore = maxp; path[row*seqlen+column] = mscore; compass[row*seqlen+column] = 2; /* Score from bottom */ } } if(mscore < 0.0) { path[row*seqlen+column] = 0.0; compass[row*seqlen+column] = DIAG; mscore = 0.0; } if(mscore > ret) ret = mscore; ++column; } } if(show) { for(row=proflen-1;row>-1;--row) { ajStrDelStatic(&outstr); for(column=0;column pathmax) { pathmax = path[row*seqlen+column]; xpos = column; ypos = row; } ajDebug("score:%.3f xpos:%d/%d ypos:%d/%d\n", pathmax, xpos, seqlen, ypos, proflen); column = xpos; row = ypos; ajStrAssignClear(m); ajStrAssignClear(n); p = ajStrGetPtr(cons); q = ajStrGetPtr(seq); while(row>= 0 && column >= 0) { score = path[row*seqlen+column]; direction = compass[row*seqlen+column]; ajDebug("row:%d col:%d dir:%d\n", row, column, direction); if(direction == DIAG) { ajStrAppendK(m,p[row--]); ajStrAppendK(n,q[column--]); if(path[(row)*seqlen+(column)]<=0.) break; ajDebug("diagonal row:%d col:%d path:%.2f\n", row, column, path[(row)*seqlen+(column)]); continue; } else if(direction == LEFT) { gapcnt = 0; ix = column-1; while(1) { bimble = path[row*seqlen+ix] - pmatrix[row][GAPO]*gapopen - (gapcnt * pmatrix[row][GAPE]*gapextend); if(!ix || fabs((double)score-(double)bimble)0.0) ajStrAppendC(&fm,":"); else ajStrAppendC(&fm," "); } } /* Get start residues */ p = ajStrGetPtr(fa); q = ajStrGetPtr(fb); acnt = begina+start1; bcnt = beginb+start2; len = ajStrGetLen(fa); pos = 0; if(mark) r = ajStrGetPtr(fm); /* Add header stuff here */ ajFmtPrintF(outf,"Local: %s vs %s\n",namea,nameb); ajFmtPrintF(outf,"Score: %.2f\n\n",score); while(pos0.0) ++(*sim); } max = (lenm>lenn) ? lenm : lenn; *idx = *id / (float)max * (float)100.; *simx = *sim / (float)max * (float)100.; *id *= ((float)100. / (float)(olen-gaps)); *sim *= ((float)100. / (float)(olen-gaps)); ajStrDel(&fm); ajStrDel(&fn); return; } /* @func embAlignReportGlobal ************************************************* ** ** Print a global alignment ** Nucleotides or proteins as needed. ** ** @param [u] align [AjPAlign] Alignment object ** @param [r] seqa [const AjPSeq] Complete first sequence ** @param [r] seqb [const AjPSeq] Complete second sequence ** @param [r] m [const AjPStr] Walk alignment for first sequence ** @param [r] n [const AjPStr] Walk alignment for second sequence ** @param [r] start1 [ajint] start of alignment in first sequence ** @param [r] start2 [ajint] start of alignment in second sequence ** @param [r] gapopen [float] Gap open penalty to report ** @param [r] gapextend [float] Gap extend penalty to report ** @param [r] score [float] alignment score from AlignScoreX ** @param [u] matrix [AjPMatrixf] Floating point matrix ** @param [r] offset1 [ajint] first sequence offset ** @param [r] offset2 [ajint] second sequence offset ** ** @return [void] ******************************************************************************/ void embAlignReportGlobal(AjPAlign align, const AjPSeq seqa, const AjPSeq seqb, const AjPStr m, const AjPStr n, ajint start1, ajint start2, float gapopen, float gapextend, float score, AjPMatrixf matrix, ajint offset1, ajint offset2) { AjPSeq res1 = NULL; AjPSeq res2 = NULL; /* ajint end1; ajint end2; */ AjPStr fa = NULL; AjPStr fb = NULL; ajint maxlen; ajint i; ajint alen; ajint blen; ajint apos; ajint bpos; ajint nc; const char* a; const char* b; maxlen = AJMAX(ajStrGetLen(m), ajStrGetLen(n)); ajDebug("embAlignReportGlobal %d %d\n", start1, start2); ajDebug(" start1:%d start2:%d offset1:%d offset2:%d\n", start1, start2, offset1, offset2); a = ajSeqGetSeqC(seqa); b = ajSeqGetSeqC(seqb); /* generate the full aligned sequences */ ajStrSetRes(&fa, maxlen); ajStrSetRes(&fb, maxlen); /* pad the start of either sequence */ if(start1>start2) { for(i=0;istart2 start a: seqa 1..%d b: %d spaces seqb 1..%d\n", start1, nc, start1-nc); for(++nc;istart1) { for(i=0;iblen) { ajStrAppendC(&fa,&a[apos]); for(i=0;ialen) { ajStrAppendC(&fb,&b[bpos]); for(i=0;i