Calculation of native contacts and preparation of corresponding diagrams

Recent versions of carma can calculate native contacts. The assumption is that the first frame of the trajectory is the 'native' structure (use catdcd to do this), and all calculations are performed with respect to this (first) frame. To perform the calculation using only Ca atoms try something like this :


carma -v -w -col -segid A -Qfract 8.0 3 combined.dcd my.psf

The important flag is Qfract and the two numbers after it are (i) the distance cutoff (in Angstrom) for the contacts, and, (ii) the separation (in number of residues) required for a contact. If the second number is 3, then the difference in residue numbers must be greater (but not equal) to 3.

If you want to complicate things (and can wait quite a bit longer), you can use atom selections as usual :


carma -v -w -col -segid A -atmid HEAVY -Qfract 5.0 3 combined.dcd my.psf


Carma will create a file named carma.Qfraction.dat containing three similarity measures. These are Q, Qs and q (in this order) as defined in the supplementary material of this paper with the difference that for the calculation of q the residue separation (given in the command line) is being applied.



Assuming you have a DCD with the native structure as a first frame, you can prepare a diagram for 'q vs Rg' with something like this :


#
# Prepare carma.Rgyration.dat
#
carma -v -Rg -atmid HEAVY combined.dcd my.psf

#
# Prepare carma.Qfraction.dat
#
carma -v -w -col -segid A -Qfract 8 2 combined.dcd my.psf

#
# Merge selected columns
#
paste carma.Rgyration.dat carma.Qfraction.dat | awk '{print $2 " " $6}' > Rg_vs_q.dat

#
# Prepare matrix (see at the end of this page for source code)
#
2matrix -50 Rg_vs_q.dat

#
# Plot it ...
#
plot -cc < grid.matrix
plot -cc < grid_log.matrix



Giving (after rotation) something like this :



If instead of Rg you want to use, say, the first principal component from dihedral PCA, you could proceed like this :


#
# Do dPCA
#
carma -v -w -color -segid A -dPCA 5 3 320 combined.dcd my.psf

#
# Prepare carma.Qfraction.dat
#
carma -v -w -col -segid A -Qfract 8 2 combined.dcd my.psf

#
# Merge selected columns
#
paste carma.dPCA.fluctuations.dat carma.Qfraction.dat | awk '{print $2,$10}' > PC1_vs_q.dat

#
# Prepare matrix (see at the end of this page for source code)
#
2matrix -50 PC1_vs_q.dat

#
# Plot it ...
#
plot -cc < grid.matrix
plot -cc < grid_log.matrix



Giving (after rotation) something like this (for the first component) …


… or like this (for the second component) …



More complex diagrams are also feasible. Imagine mapping on the PC1-PC2 distribution the corresponding average q values. Here is an example (log density of PC1-PC2 on the left panel, average q values for corresponding structures to the right, red is high) :


Rendering the matrix with dx to show-off (guide for loading data to dx) :


… and another matrix for completeness :


Here is another example. One axis is Rg, the other axis is PC1, and the colors correspond to similarity to the native structure (q) (dark red → high similarity, dark blue → low) :



Source code for the program '2matrix' :

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
 
#define	nearint(a)	((a) < 0.0 ? (int)(a-0.50) : (int)(a+0.50))
#define	MAXGRID	1000
 
int main(argc,argv)
int     argc;
char    *argv[];
{
	FILE	*in;
	FILE	*out;
	char	line[100000];
	float	v1, v2, v3;
	int	c, cc;
	double	minx, maxx, miny, maxy;
	int	N;
	int	grid;
	unsigned int	data[MAXGRID+1][MAXGRID+1];
	double		dens[MAXGRID+1][MAXGRID+1];
	int	u, v;
 
 
	if ( argc !=3 || sscanf( argv[1], "%d", &grid ) != 1 )
		{
			printf("Usage : 2matrix -GRID filename\n");
			exit(1);
		}
 
	if ( grid < 0 )
		grid *= -1;
 
	grid -= 1;
 
	if ( grid < 2 )
		{
			printf("Selected number of grid points doesn't make sense.\n");
			exit(1);
		}
 
	if ( grid > MAXGRID )
		{
			printf("Sorry. Increase MAXGRID and re-compile.\n");
			exit(1);
		}
 
	in = fopen( argv[2], "r" );
	if ( in == NULL )
		{
			printf("Can not open file %s for reading.\n");
			exit(1);
		}
 
	out = fopen( "grid.matrix", "w" );
	if ( out == NULL )
		{
			printf("Can not open output file for writing.\n");
			exit(1);
		}
 
	N = 0;
	printf("First pass to determine limits ...\n");
	while( fgets( line, 99999, in) != NULL )
		{
			if ( (c = sscanf( line, "%f %f %f", &v1, &v2, &v3)) != 2 && c != 3 )
				{
					printf("\nFailed to read data from line : %s\n", line );
					exit(1);
				}
			if ( N == 0 )
				{
					cc = c;
					minx = v1;
					maxx = v1;
					miny = v2;
					maxy = v2;
					printf("Now reading line %9d", N );
 
				}		
			if ( v1 > maxx )
				maxx = v1;
			if ( v1 < minx )
				minx = v1;
			if ( v2 > maxy )
				maxy = v2;
			if ( v2 < miny )
				miny = v2;
 
			if ( c != cc )
				{
					printf("\nNumber of columns not constant : %s\n", line );
					exit(1);
				}
 
 
			N++;
			if ( N % 100 == 0 )
				printf("HHHHHHHHH%9d", N );	/* These are backspaces */
 
		}
 
	printf("HHHHHHHHH%9d\n", N );
 
	rewind( in );
	printf("Read %d lines.\n", N );
 
	if ( cc == 3 )
		printf("Three columns found. Using the third to calculate sums.\n");
 
	printf("Limits are :\n");
	printf("	on x : %f to %f\n", minx, maxx );
	printf("	on y : %f to %f\n", miny, maxy );
 
	for ( u=0 ; u <= grid ; u++ )
	for ( v=0 ; v <= grid ; v++ )
		{
			data[u][v] = 0;	
			dens[u][v] = 0.0l;
		}
 
	printf("\nSecond pass to interpolate on a %d by %d grid ...\n", grid+1, grid+1 );
 
	printf("Now reading line %9d", 0 );
	N=0;
	while( fgets( line, 99999, in) != NULL )
		{
 
			if ( ( c = sscanf( line, "%f %f %f", &v1, &v2, &v3)) != 2 && c != 3 || c != cc )
				{
					printf("\nFailed to read data from line : %s\n", line );
					exit(1);
				}
 
			u =  (int)(((grid * v1 - grid * minx) / ( maxx - minx )) + 0.50);
			v =  (int)(((grid * v2 - grid * miny) / ( maxy - miny )) + 0.50);
 
			data[u][v]++;
			dens[u][v] += (double)(v3);
 
			N++;
			if ( N % 100 == 0 )
				printf("HHHHHHHHH%9d", N );
 
		}
 
	printf("HHHHHHHHH%9d\n", N );
 
	fclose( in );
 
 
 
	printf("Writing output in grid.matrix.\n");
	for ( u=0 ; u <= grid ; u++ )
	{
	for ( v=0 ; v <= grid ; v++ )
		if ( cc == 2 )
			fprintf(out, "%8.7e ", (float)(data[u][v]) );
		else
			{
				if ( data[u][v] >= 1 )
					fprintf(out, "%8.7e ", dens[u][v] / data[u][v] );
				else
					fprintf(out, "%8.7e ", 0.0l );
			}
	fprintf(out, "\n");
	}
 
	fclose( out );
 
	out = fopen( "grid_log.matrix", "w" );
	if ( out == NULL )
		{
			printf("Can not open output file for writing.\n");
			exit(1);
		}
	printf("Writing logarithms in grid_log.matrix.\n");
	for ( u=0 ; u <= grid ; u++ )
	{
	for ( v=0 ; v <= grid ; v++ )
		{
			if ( cc == 2 )
			{
			if ( data[u][v] > 1 )
				fprintf(out, "%8.7e ", log( data[u][v] ) );
			else 
				fprintf(out, "%8.7e ", 0.0l );
			}
			else
			{
			if ( data[u][v] == 0 )
				fprintf(out, "%8.7e ", 0.0l );	
			else if ( (dens[u][v] / data[u][v]) > 0.0 )
				fprintf(out, "%8.7e ", log( dens[u][v] / data[u][v])  );
			else
				{
					printf("\nNegative values present. Giving-up on writing logs.\n");
					fclose( out );
					system("/bin/rm grid_log.matrix");
					exit( 1 );
				}
			}
		}
	fprintf(out, "\n");
	}
 
	fclose( out );
 
 
 
	printf("All done.\n", N );
 
}
 
 
 
 
research/howto/preparation_of_diagrams_based_on_native_contacts_q_qs_s.txt · Last modified: 2012/02/05 18:26 by glykos