184 lines
4.7 KiB
C++
184 lines
4.7 KiB
C++
#include <bits/stdc++.h>
|
|
using namespace std;
|
|
using ll = long long;
|
|
|
|
string P[3] = {
|
|
"ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCA",
|
|
"CCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTC",
|
|
"TTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATC" };
|
|
double score[3][7] = { {449.7,449.7,449.7,449.7,437.7,161,147.35},
|
|
{449.7,449.7,449.4,449.7,437.7,158,149},
|
|
{449.7,449.4,404.4,450,449.7,172,146.85} };
|
|
|
|
vector<string> vrt[7];
|
|
|
|
ll levenshtein(string &s, string &t){
|
|
int n = s.size(), m = t.size();
|
|
ll dp[n + 1][m + 1]; //goal to be low as possible
|
|
for(int i = 0; i <= n; i++)
|
|
for(int j = 0; j <= m; j++){
|
|
ll &dpc = dp[i][j] = (i || j) * n * m;
|
|
if(i) dpc = min(dpc, dp[i - 1][j] + 1); //add char s
|
|
if(j) dpc = min(dpc, dp[i][j - 1] + 1); //add char t
|
|
if(i && j) dpc = min(dpc, dp[i - 1][j - 1] + (s[i - 1] != t[j - 1])); //replace or equal
|
|
}
|
|
return dp[n][m];
|
|
}
|
|
|
|
ll smithWaterman(string &s, string &t){
|
|
int n = s.size(), m = t.size(), x = 0, y = 0;
|
|
ll dp[n + 1][m + 1]; //goal to be high as possible
|
|
for(int i = 0; i <= n; i++)
|
|
for(int j = 0; j <= m; j++){
|
|
ll &dpc = dp[i][j] = 0;
|
|
if(i) dpc = max(dpc, dp[i - 1][j] - 2); //add char s
|
|
if(j) dpc = max(dpc, dp[i][j - 1] - 2); //add char t
|
|
if(i && j) dpc = max(dpc, dp[i - 1][j - 1] + 3 * (2 * (s[i - 1] == t[j - 1]) - 1)); //test equality
|
|
if(dpc > dp[x][y]) x = i, y = j;
|
|
}
|
|
return dp[x][y];
|
|
}
|
|
|
|
//change return to switch out objective
|
|
ll sequenceCmp(string s, string t){
|
|
return smithWaterman(s, t);
|
|
}
|
|
|
|
const int k = 21;
|
|
bitset<k> test(string & probe) {
|
|
bitset<k> bs;
|
|
double avg[7];
|
|
for (int i = 0; i < 7; ++i) {
|
|
for (int j = 0; j < 10; ++j) {
|
|
int r = rand()%20;
|
|
avg[i] += sequenceCmp(probe, vrt[i][r]);
|
|
}
|
|
avg[i] /= 2;
|
|
}
|
|
int cnt = 0;
|
|
for (int i = 0; i < 7; ++i)
|
|
for (int j = i+1; j < 7; ++j)
|
|
{
|
|
if (abs(avg[i]-avg[j]) > 5) bs[cnt] = 1;
|
|
cnt++;
|
|
}
|
|
return bs;
|
|
}
|
|
|
|
vector<string> dp[4][1 << k];
|
|
void analyze_spike_sequences(){
|
|
int cnt = 0;
|
|
for (int i = 0; i < 7; ++i) {
|
|
for (int j = 0; j <= vrt[i][0].size()-150; j += rand()%100) {
|
|
string probe(begin(vrt[i][0])+j, begin(vrt[i][0])+j+150);
|
|
bitset<k> bs = test(probe);
|
|
cout << probe << ' ' << cnt++ << endl;
|
|
|
|
int bsint = 0; //change to int
|
|
for(int i = 0; i < k; i++) bsint |= bs[i] << i;
|
|
|
|
//add to dp knapack fashion
|
|
for(int i = 2; ~i; i--)
|
|
for(int j = 0; j < (1 << k); j++){
|
|
if(dp[i][j].size() == i && (i || !j)){
|
|
int x = i + 1, y = j | bsint;
|
|
if(dp[x][y].empty()){
|
|
dp[x][y] = dp[i][j];
|
|
dp[x][y].push_back(probe);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int retx = 0, rety = 0;
|
|
//find best dp coverage
|
|
for(int i = 3; i <= 3; i++)
|
|
for(int j = 0; j < (1 << k); j++){
|
|
if(!dp[i][j].empty() && __builtin_popcount(j) > __builtin_popcount(rety)){
|
|
retx = i, rety = j;
|
|
}
|
|
}
|
|
|
|
cout << retx << " " << rety << endl;
|
|
for(string probe : dp[retx][rety]){
|
|
bitset<k> bs = test(probe);
|
|
for(int i = 0; i < k; i++) cout << bs[i];
|
|
cout << endl;
|
|
cout << probe << endl;
|
|
}
|
|
}
|
|
|
|
void mkscores() {
|
|
cout << '{';
|
|
for (int i = 0; i < 3; ++i) {
|
|
cout << "{";
|
|
for (int j = 0; j < 7; ++j) {
|
|
score[i][j] = 0;
|
|
for (int k = 0; k < 20; ++k)
|
|
score[i][j] += sequenceCmp(P[i], vrt[j][k]);
|
|
score[i][j] /= 20;
|
|
cout << score[i][j];
|
|
if (j < 6) cout << ",";
|
|
}
|
|
cout <<"}";
|
|
if (i < 2) cout << ",";
|
|
cout << '\n';
|
|
}
|
|
cout << '}';
|
|
}
|
|
|
|
// classify a string
|
|
int solve(string & s) {
|
|
double val[3];
|
|
for (int i = 0; i < 3; ++i) val[i] = sequenceCmp(P[i], s);
|
|
double diff[7];
|
|
for (int i = 0; i < 7; ++i) {
|
|
diff[i] = 0;
|
|
for (int j = 0; j < 3; ++j) diff[i] += pow(val[j]-score[j][i], 2);
|
|
}
|
|
return min_element(diff, diff+7)-diff;
|
|
}
|
|
|
|
string types[7] = {"original",
|
|
"B.1.1.7",
|
|
"B.1.351",
|
|
"B.1.427",
|
|
"P.1",
|
|
"not_sars_cov2",
|
|
"not_sars_cov2"
|
|
};
|
|
|
|
void answer(){
|
|
for(int i = 0; i < 100;i++){
|
|
string inp; cin >> inp;
|
|
cout << types[solve(inp)] << '\n';
|
|
}
|
|
}
|
|
|
|
|
|
|
|
int main(){
|
|
ios::sync_with_stdio(0);
|
|
cin.tie(0);
|
|
if (fopen("input.txt", "r")) {
|
|
freopen("input.txt", "r", stdin);
|
|
answer();
|
|
return 0;
|
|
}
|
|
if (fopen("spikesequences.txt", "r")) {
|
|
freopen("spikesequences.txt", "r", stdin);
|
|
for (int i = 0; i < 7; ++i) { // 7 vrts
|
|
for (int j = 0; j < 20; ++j) {
|
|
string s, t; cin >> s >> t;
|
|
vrt[i].push_back(t);
|
|
}
|
|
}
|
|
if (P[0] != "") mkscores();
|
|
else analyze_spike_sequences();
|
|
return 0;
|
|
}
|
|
answer();
|
|
return 0;
|
|
}
|