Correlation Program
13 February, 2015
This program allows you to enter as many data sets as you like, then it finds the correlations between all of them.
Here is a sample output:
Here is the code. This isn't a step by step tutorial, just a tutorial through example. If you have any question feel free to ask in the comments.
#include "stdafx.h"
#include <stdlib.h>
#include <iostream>
#include <string>
#include <sstream>
using namespace std;
const int size = 1000;
int variables;
void get_variables();
void get_data();
void completedataset();
void get_mean(int i);
void a(int i);
void asqr(int i);
void correlate();
float a_times_b_sum(int i, int j);
class DATASET{
public:
int id;
string name;
int size_of;
float data[size];
float mean;
float a[size];
float asqr[size];
float asqrsum;
};
DATASET dataset[size];
int _tmain(int argc, _TCHAR* argv[])
{
while (1)
{
get_variables();
get_data();
completedataset();
correlate();
}
return 0;
}
void get_variables()
{
string tempstring;
cout << "Number of variables: ";
cin >> tempstring;
if (!(istringstream(tempstring) >> variables).fail())
{
for (int i = 0; i < variables; i++)
{
cout << "Enter Variable " << i +1 << " Name:" << endl << ":";
cin >> dataset[i].name;
}
}
else
{
cout << "Invalid"<<endl;
}
}
void get_data()
{
string read;
bool next=false;
for (int i = 0; i < variables;i++)
{
dataset[i].size_of = 0;
next = false;
cout << "Variable: " << dataset[i].name<<endl;
cout << "Enter Data. Enter any letter to move to the next variable." << endl << ":";
for (int j = 0; next == false; j++)
{
cin >> read;
if (!(istringstream(read) >> dataset[i].data[j]).fail())
{
cout << ":";
dataset[i].size_of++;
}
else
{
cout << "Moving Along..."<<endl;
next = true;
}
}
}
}
void completedataset()
{
for (int i = 0; i < variables; i++)
{
get_mean(i);
a(i);
asqr(i);
}
}
void get_mean(int i)
{
float sum = 0;
for (int j = 0; j < dataset[i].size_of; j++)
{
sum = sum + dataset[i].data[j];
}
dataset[i].mean = sum / (dataset[i].size_of);
}
void a(int i)
{
for (int j = 0; j < dataset[i].size_of;j++)
{
dataset[i].a[j] = dataset[i].data[j] - dataset[i].mean;
}
}
void asqr(int i)
{
dataset[i].asqrsum = 0;
for (int j = 0; j < dataset[i].size_of; j++)
{
dataset[i].asqr[j] = dataset[i].a[j] * dataset[i].a[j];
dataset[i].asqrsum = dataset[i].asqrsum + dataset[i].asqr[j];
}
}
void correlate()
{
float p;
for (int i = 0; i < variables; i++)
{
for (int j = 0; j < variables; j++)
{
p=(a_times_b_sum(i, j))/(sqrt(dataset[i].asqrsum*dataset[j].asqrsum));
cout << dataset[i].name << " & " << dataset[j].name << ": P=" << p<<endl;
}
}
}
float a_times_b_sum(int i , int j)
{
float sum=0;
for (int k = 0; k < dataset[i].size_of || k < dataset[j].size_of; k++)
{
sum = sum + (dataset[i].a[k] * dataset[j].a[k]);
}
return sum;
}