-
Notifications
You must be signed in to change notification settings - Fork 31
gnuplot_snippets
b-k edited this page Aug 30, 2014
·
4 revisions
This page provides a number of utilities to get Gnuplot to print interesting things.
###lattice plots This snippets produces a lattice of plots using a single input matrix.
static void printone(FILE *f, double width, double height, double margin, int xposn, int yposn, const apop_data *d); //see below
/* This produces a Gnuplot file that will produce an array of 2-D
plots, one for each pair of columns in the data set. Along the diagonal
is a plot of the variable against itself---a density plot of the variable.
\param d The data set whose (matrix) columns will be compared.
\param f The output pipe. E.g., fopen("lattice.gnuplot", "w") or popen("gnuplot --persist", "w")
*/
void apop_plot_lattice(const apop_data *d, FILE *f){
double width = 1.2,//feel free to make these variable inputs.
height = 1.2;
double margin = 0;
fprintf(f, "set size %g, %g\n"
"set rmargin 5\n"
"set lmargin -1\n"
"set tmargin 2.4\n"
"set bmargin -2\n"
"set origin %g, %g \n"
"set multiplot #layout %zu, %zu downwards \n"
"unset xtics; unset xlabel; unset ytics; unset ylabel\n"
"set nokey \n"
, width, height, margin,margin, d->matrix->size2, d->matrix->size2);
for (size_t i = 0; i< d->matrix->size2; i++)
for (size_t j = 0; j< d->matrix->size2; j++)
printone(f, width, height, margin, i, j, d);
fprintf(f, "unset multiplot\n");
}
/* the next function plots a single graph for the \ref apop_plot_lattice fn */
static void printone(FILE *f, double width, double height, double margin, int xposn, int yposn, const apop_data *d){
//pull two columns
size_t count = d->matrix->size2;
double nudge = 0.08;
gsl_vector v1 = gsl_matrix_column(d->matrix, xposn).vector;
gsl_vector v2 = gsl_matrix_column(d->matrix, yposn).vector;
gsl_matrix *m = gsl_matrix_alloc(d->matrix->size1, 2);
gsl_matrix_set_col(m, 0, &v1);
gsl_matrix_set_col(m, 1, &v2);
double sizex = (double)(width - margin * (count -1))/count;
double sizey = (double)(height - margin * (count -1))/count;
double offx = width - (sizex +margin-nudge)* (count - xposn) - nudge*count;
double offy = height - (sizey +margin-nudge)* (1 + yposn) - nudge*count;
fprintf(f, "unset y2tics; unset y2label; unset ytics; unset ylabel\n");
fprintf(f, "unset x2tics; unset x2label; unset xtics; unset xlabel\n");
fprintf(f, "set size %g, %g\n"
"set origin %g, %g\n"
, sizex, sizey, offx, offy);
//Gnuplot has no way to just set a label with no plot, so
//labels have to be drawn with a long offset from one of the
//plots we do display.
if (xposn == yposn+1)
fprintf(f, "set label %i '%s' center at graph %g, %g\n",yposn+1, (d->names->colct >yposn)? d->names->col[yposn]: "", -0.5, 0.5);
if ((yposn == count-1) && (xposn == count-2))
fprintf(f, "set label %zu '%s' center at graph %g, %g\n",count, (d->names->colct >count -1)? d->names->col[count -1]: "", 1.5, 0.5);
if (xposn != yposn){
fprintf(f, "plot '-'\n");
fflush(f);
apop_matrix_print(m, NULL, f, 'p');
fprintf(f,"e\n");
gsl_matrix_free(m);
}
if (xposn == yposn+1)
fprintf(f, "unset label %i \n",yposn+1);
if ((yposn == count-1) && (xposn == count-2))
fprintf(f, "unset label %zu\n",count);
}
###triangle plots Here is a triangle plot, where points are placed based on the relative weight of three variables.
/** This produces a nifty triangle plot from an input matrix with three
columns. Each row is plotted inside an equilateral triangle such that
(1, 0, 0) is the lower left corner, (0, 1, 0) the lower right corner,
and (0, 0, 1) the middle, upper corner.
\image html "triangle.png" "A triangle plot with a smattering of data points."
\li Gnuplot will rescale for you, so you don't need to worry about whether the row sums to one.
\li See \ref apop_prep_output for more on how printing settings are set.
\li See also the legible output section of the \ref outline for more details and examples.
\li This function uses the \ref designated syntax for inputs.
*/
void apop_plot_triangle(apop_data *in, FILE *f){
Apop_stopif(!in, return, 0, "You sent me a NULL data set.");
if (in->names && in->names->colct>=3){
fprintf(f, "set label '%s' at -0.03, 0 right; \n", in->names->col[0]);
fprintf(f, "set label '%s' at 1.03, 0 left; \n", in->names->col[1]);
fprintf(f, "set label '%s' at 0.5, 1/sqrt(2)+0.05 center; \n", in->names->col[2]);
}
fprintf(f,
" set size square; \n"
" set noborder; set nogrid; \n"
" set noxtics; set noytics; \n"
" set nokey; \n"
" set lmargin 10; \n"
" set bmargin 3; \n"
" set tmargin 5; \n"
" set arrow 1 from 0,0 to 1,0 nohead \n"
" set arrow 2 from 0.5,1/sqrt(2) to 1,0 nohead \n"
" set arrow 3 from 0.5,1/sqrt(2) to 0,0 nohead \n"
" unset title \n"
" set xrange [-.1:1.1] \n"
" set yrange [-.1:0.81] \n"
" plot '-' using (($2 + $3/2)/($1+$2+$3)):($3/($1+$2+$3)/sqrt(2)) with points; \n"
);
gsl_matrix *triplets = Apop_subm(in->matrix, 0,0, in->matrix->size1,3);
apop_matrix_print(triplets, .output_pipe=f, .output_type='p');
}
This snippet plots a scatterplot with a regression line through it, to make blatant a relationsip between the two variables. After the snippet, there is a sample program using it.
/** Prep for Gnuplot one of those cute scatterplots with a regression line through it.
Currently, you only get two dimensions.
It appends instead of overwriting, so you can prep the file if you want; see sample code. [to overwrite a file, just remove it first with the standard C function <tt>remove("filename");</tt>]
\param data This is a copy of what you'd sent to the regression fn. That is, the first column is the dependent variable and the second is the independent. That is, what will be on Y axis is the <i>first</i> column, and what is on the X axis is the second. Custom for regressions and custom for graphs just clash on this one.
\param est The apop_model structure your regression function gave
you. (if NULL, I'll estimate an OLS model for you).
The sample program below will pull data from a database (ridership at
the Silver Spring, MD Metro station; get the database in the {\em Modeling
with Data} sample code, at http://modelingwithdata.org/appendices.html), then runs OLS, and produce
a Gnuplot file to write to a file named "scatter.eps". You can run the
result through Gnuplot via <tt> gnuplot scatter.gplot</tt>, and if you
don't like the results, you have the Gnuplot file ("scatter.gplot") on
hand for modifications.
*/
void apop_plot_line_and_scatter(apop_data *data, apop_model *est, FILE *f){
int newmodel=0;
if (!est) {
newmodel++;
est = apop_estimate(data, apop_ols);
}
char exdelimiter[100];
fprintf(f, "f(x) = %g + %g * x\n", gsl_vector_get(est->parameters->vector,0), gsl_vector_get(est->parameters->vector,1));
if (data->names){
fprintf(f, "set xlabel \"%s\"\n", data->names->col[1]);
fprintf(f, "set xlabel \"%s\"\n", data->names->vector);
}
fprintf(f, "plot \"-\" using 2:1 , f(x) with lines;\n");
//force the delimiter to be a comma space; don't tell the user.
strcpy(exdelimiter, apop_opts.output_delimiter);
strcpy(apop_opts.output_delimiter, ", ");
apop_matrix_print(data->matrix, output_name, output_pipe, output_type, 'a');
strcpy(apop_opts.output_delimiter, exdelimiter);
if (newmodel) apop_model_free(est);
}
Sample program:
#include <apop.h>
int main(){
char outfile[] = "scatter.gplot";
apop_db_open("data-metro.db");
apop_data *data = apop_query_to_data("select riders, year from riders where station like 'Silver%%' and riders>0");
apop_db_close();
//The regression destroys your data, so copy it first.
apop_data *data_copy = apop_data_copy(data);
//Run OLS, display results on terminal
apop_model *est = apop_estimate(data, apop_OLS);
apop_model_show(est);
//Prep the file with a header, then call the function.
FILE *f = fopen(outfile, "w");
fprintf(f,"set term postscript;\n set output \"scatter.eps\"\n set yrange [0:*]\n");
apop_plot_line_and_scatter(data_copy, est, .output_pipe=f);
fclose(f);
}
###Q-Q plots
/** Plot the percentiles of a data set against the percentiles of a distribution.
The distribution percentiles will be on the $x$-axis, your data percentiles on the $y$-.
\param v The data (No default, must not be \c NULL.)
\param m The distribution, such as apop_normal. I'll be using the \c draw method. (Default = best-fitting Normal)
\param bins The number of bins in the histogram. The number of points on the plot will always be 101 (i.e. percentiles). (default = MAX(10, data->size/10); denominator subject to future adjustment)
\param r A \c gsl_rng.
*/
void apop_plot_qq(gsl_vector *v, apop_model *m, FILE *f, size_t bins, gsl_rng *r){
Apop_stopif(!v, return, 0, "Input vector is NULL.");
int free_m = 0;
if (!m){
free_m++;
apop_data *d = apop_vector_to_data(v);
m = apop_estimate(d, apop_normal);
d->vector = NULL;
apop_data_free(d);
}
if (!bins) bins = GSL_MAX(10, v->size/10);
double *pctdata = apop_vector_percentiles(v, 'a');
//produce percentiles from the model via RNG.
gsl_vector *vd = gsl_vector_alloc(bins);
for(int i=0; i< bins; i++)
m->draw(gsl_vector_ptr(vd, i), r, m);
double *pctdist = apop_vector_percentiles(vd, 'a');
fprintf(f, "set key off; set size square;\n"
"plot x;\n"
"replot '-' with points\n");
for (int i=0; i < 101; i++)
fprintf(f, "%g\t %g\n", pctdist[i], pctdata[i]);
fprintf(f, "e\n");
gsl_vector_free(vd);
if (free_m) apop_model_free(m);
}