COVID-Challenge/main.cpp
2021-06-02 19:29:56 -05:00

192 lines
4.9 KiB
C++

#include <bits/stdc++.h>
using namespace std;
using ll = long long;
string P[3] = {
"ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCA",
"TTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTC",
"TCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCT" };
double score[3][7] = {{449.7,449.7,449.7,449.7,437.7,161,147.35},
{450,435,450,449.7,444,180,139.8},
{449.4,449.1,404.7,450,449.7,147,148}
};
vector<string> vrt[7];
ll levenshtein(string &s, string &t){
int n = s.size(), m = t.size();
ll dp[n + 1][m + 1]; //goal to be low as possible
for(int i = 0; i <= n; i++)
for(int j = 0; j <= m; j++){
ll &dpc = dp[i][j] = (i || j) * n * m;
if(i) dpc = min(dpc, dp[i - 1][j] + 1); //add char s
if(j) dpc = min(dpc, dp[i][j - 1] + 1); //add char t
if(i && j) dpc = min(dpc, dp[i - 1][j - 1] + (s[i - 1] != t[j - 1])); //replace or equal
}
return dp[n][m];
}
ll smithWaterman(string &s, string &t){
int n = s.size(), m = t.size(), x = 0, y = 0;
ll dp[n + 1][m + 1]; //goal to be high as possible
for(int i = 0; i <= n; i++)
for(int j = 0; j <= m; j++){
ll &dpc = dp[i][j] = 0;
if(i) dpc = max(dpc, dp[i - 1][j] - 2); //add char s
if(j) dpc = max(dpc, dp[i][j - 1] - 2); //add char t
if(i && j) dpc = max(dpc, dp[i - 1][j - 1] + 3 * (2 * (s[i - 1] == t[j - 1]) - 1)); //test equality
if(dpc > dp[x][y]) x = i, y = j;
}
return dp[x][y];
}
//change return to switch out objective
ll sequenceCmp(string s, string t){
return smithWaterman(s, t);
}
const int k = 21;
bitset<k> test(string & probe) {
bitset<k> bs;
double avg[7];
for (int i = 0; i < 7; ++i) {
avg[i] = 0;
for (int j = 0; j < 20; ++j) {
avg[i] += sequenceCmp(probe, vrt[i][j]);
}
avg[i] /= 20;
cout << avg[i] << ' ';
}
cout << '\n';
int cnt = 0;
for (int i = 0; i < 7; ++i) {
for (int j = i+1; j < 7; ++j) {
if (abs(avg[i]-avg[j]) > 10) bs[cnt] = 1;
cnt++;
cout << avg[i] << ' ' << avg[j] << ' ' << bs[cnt-1] << '\n';
}
}
cout << bs << '\n';
return bs;
}
vector<string> dp[4][1 << k];
void analyze_spike_sequences(){
int cnt = 0;
for (int i = 0; i < 7; ++i) {
for (int j = 0; j <= vrt[i][0].size()-150; ++j) {
string probe(begin(vrt[i][0])+j, begin(vrt[i][0])+j+150);
bitset<k> bs = test(probe);
cout << probe << ' ' << cnt++ << endl;
int bsint = 0; //change to int
for(int i = 0; i < k; i++) bsint |= bs[i] << i;
//add to dp knapack fashion
for(int i = 2; ~i; i--)
for(int j = 0; j < (1 << k); j++){
if(dp[i][j].size() == i && (i || !j)){
int x = i + 1, y = j | bsint;
if(dp[x][y].empty()){
dp[x][y] = dp[i][j];
dp[x][y].push_back(probe);
}
}
}
}
}
int retx = 0, rety = 0;
//find best dp coverage
for(int i = 3; i <= 3; i++)
for(int j = 0; j < (1 << k); j++){
if(!dp[i][j].empty() && __builtin_popcount(j) > __builtin_popcount(rety)){
retx = i, rety = j;
}
}
cout << retx << " " << rety << endl;
for(string probe : dp[retx][rety]){
bitset<k> bs = test(probe);
for(int i = 0; i < k; i++) cout << bs[i];
cout << endl;
cout << probe << endl;
}
}
void mkscores() {
cout << '{';
for (int i = 0; i < 3; ++i) {
cout << "{";
for (int j = 0; j < 7; ++j) {
score[i][j] = 0;
for (int k = 0; k < 20; ++k)
score[i][j] += sequenceCmp(P[i], vrt[j][k]);
score[i][j] /= 20;
cout << score[i][j];
if (j < 6) cout << ",";
}
cout <<"}";
if (i < 2) cout << ",";
cout << '\n';
}
cout << '}';
}
// classify a string
int solve(string & s) {
double val[3];
for (int i = 0; i < 3; ++i) val[i] = sequenceCmp(P[i], s);
double diff[7];
for (int i = 0; i < 7; ++i) {
diff[i] = 0;
for (int j = 0; j < 3; ++j) diff[i] += pow(val[j]-score[j][i], 2);
}
return min_element(diff, diff+7)-diff;
}
string types[7] = {"original",
"B.1.1.7",
"B.1.351",
"B.1.427",
"P.1",
"not_sars_cov2",
"not_sars_cov2"
};
void answer(){
for(int i = 0; i < 100;i++){
string inp; cin >> inp;
cout << types[solve(inp)] << '\n';
}
}
int main(){
ios::sync_with_stdio(0);
cin.tie(0);
srand(time(0));
/*if (fopen("input.txt", "r")) {
freopen("input.txt", "r", stdin);
answer();
return 0;
}*/
if (fopen("spikesequences.txt", "r")) {
freopen("spikesequences.txt", "r", stdin);
for (int i = 0; i < 7; ++i) { // 7 vrts
for (int j = 0; j < 20; ++j) {
string s, t; cin >> s >> t;
vrt[i].push_back(t);
}
}
// if (P[0] != "") mkscores();
// else
analyze_spike_sequences();
return 0;
}
answer();
return 0;
}