#!C:/STRAWB~1/perl/bin/perl.exe -w use strict; use warnings; my $proteinfile = "Protein"; print "Enter a file containing DNA or RNA:\n\n"; my $DNA = ; chomp $DNA; if(-f $DNA) { local $/=undef; open(DNAFILE, $DNA) or die "ERROR open File: $DNA ($!)\n"; $DNA = ; close DNAFILE; } else { print "File not found\n"; print "using \"$DNA\" as sequence \n\n"; } $DNA =~ s/\s//g; #Überprüfen ob file DNA oder RNA enthält my $i_DNA = $DNA=~tr/ACTG//; my $i_RNA = $DNA=~tr/U//; my $i_RNA += $i_DNA; #Da die RNA aus allen U plus den anderen Basen besteht #wenn nun $i nicht der Länge der Sequenz entspricht, war mindestens 1 Element kein Nukleotid if($i_DNA == length $DNA) { print "Sequence successfully identified as DNA\n\n"; } elsif($i_RNA == length $DNA) { print "Sequence succesfully identified as RNA\n\n"; } else { print "Sequence is whether DNA nor RNA\n\n"; print "cannot continue!\n\n"; exit(); } #DNA wird in RNA umgeschrieben (my $RNA = $DNA) =~ s/T/U/ig; #print "The RNA sequence is:\n\n"; #print "$RNA\n\n"; #unpack kann auch als alternative zu split verwendet werden wobei dann "A1" x length $RNA benutzt würde my @triplets = unpack("A3" x (length ($RNA)/3), $RNA); #Initialisierung der CODONS my %CODONS=(); my %TRIPLETS=(); $CODONS{ala}={letter=>'A', count=>0}; $TRIPLETS{GCU}=$CODONS{ala}; $TRIPLETS{GCC}=$CODONS{ala}; $TRIPLETS{GCA}=$CODONS{ala}; $TRIPLETS{GCG}=$CODONS{ala}; $CODONS{cys}={letter=>'C', count=>0}; $TRIPLETS{UGU}=$CODONS{cys}; $TRIPLETS{UGC}=$CODONS{cys}; $CODONS{asp}={letter=>'D', count=>0}; $TRIPLETS{GAU}=$CODONS{asp}; $TRIPLETS{GAG}=$CODONS{asp}; $CODONS{glu}={letter=>'E', count=>0}; $TRIPLETS{GAA}=$CODONS{glu}; $TRIPLETS{GAG}=$CODONS{glu}; $CODONS{phe}={letter=>'F', count=>0}; $TRIPLETS{UUU}=$CODONS{phe}; $TRIPLETS{UUC}=$CODONS{phe}; $CODONS{gly}={letter=>'G', count=>0}; $TRIPLETS{GGU}=$CODONS{gly}; $TRIPLETS{GGC}=$CODONS{gly}; $TRIPLETS{GGA}=$CODONS{gly}; $TRIPLETS{GGG}=$CODONS{gly}; $CODONS{his}={letter=>'H', count=>0}; $TRIPLETS{CAU}=$CODONS{his}; $TRIPLETS{CAC}=$CODONS{his}; $CODONS{ile}={letter=>'I', count=>0}; $TRIPLETS{AUU}=$CODONS{ile}; $TRIPLETS{AUC}=$CODONS{ile}; $TRIPLETS{AUA}=$CODONS{ile}; $CODONS{lys}={letter=>'K', count=>0}; $TRIPLETS{AAA}=$CODONS{lys}; $TRIPLETS{AAG}=$CODONS{lys}; $CODONS{leu}={letter=>'L', count=>0}; $TRIPLETS{CUU}=$CODONS{leu}; $TRIPLETS{CUC}=$CODONS{leu}; $TRIPLETS{CUA}=$CODONS{leu}; $TRIPLETS{CUG}=$CODONS{leu}; $TRIPLETS{UUA}=$CODONS{leu}; $TRIPLETS{UUG}=$CODONS{leu}; $CODONS{met}={letter=>'M', count=>0}; $TRIPLETS{AUG}=$CODONS{met}; $CODONS{asn}={letter=>'N', count=>0}; $TRIPLETS{AAU}=$CODONS{asn}; $TRIPLETS{AAC}=$CODONS{asn}; $CODONS{pro}={letter=>'P', count=>0}; $TRIPLETS{CCU}=$CODONS{pro}; $TRIPLETS{CCC}=$CODONS{pro}; $TRIPLETS{CCA}=$CODONS{pro}; $TRIPLETS{CCG}=$CODONS{pro}; $CODONS{gln}={letter=>'Q', count=>0}; $TRIPLETS{CAA}=$CODONS{gln}; $TRIPLETS{CAG}=$CODONS{gln}; $CODONS{arg}={letter=>'R', count=>0}; $TRIPLETS{CGU}=$CODONS{arg}; $TRIPLETS{CGC}=$CODONS{arg}; $TRIPLETS{CGA}=$CODONS{arg}; $TRIPLETS{CGG}=$CODONS{arg}; $TRIPLETS{AGA}=$CODONS{arg}; $TRIPLETS{AGG}=$CODONS{arg}; $CODONS{ser}={letter=>'S', count=>0}; $TRIPLETS{AGU}=$CODONS{ser}; $TRIPLETS{AGC}=$CODONS{ser}; $TRIPLETS{UCU}=$CODONS{ser}; $TRIPLETS{UCC}=$CODONS{ser}; $TRIPLETS{UCA}=$CODONS{ser}; $TRIPLETS{UCG}=$CODONS{ser}; $CODONS{thr}={letter=>'T', count=>0}; $TRIPLETS{ACU}=$CODONS{thr}; $TRIPLETS{ACC}=$CODONS{thr}; $TRIPLETS{ACA}=$CODONS{thr}; $TRIPLETS{ACG}=$CODONS{thy}; $CODONS{val}={letter=>'V', count=>0}; $TRIPLETS{GUU}=$CODONS{val}; $TRIPLETS{GUC}=$CODONS{val}; $TRIPLETS{GUA}=$CODONS{val}; $TRIPLETS{GUG}=$CODONS{val}; $CODONS{trp}={letter=>'W', count=>0}; $TRIPLETS{UUG}=$CODONS{trp}; $CODONS{tyr}={letter=>'Y', count=>0}; $TRIPLETS{UAU}=$CODONS{tyr}; $TRIPLETS{UAC}=$CODONS{tyr}; $CODONS{stop}={letter=>'!STOP!', count=>0}; $TRIPLETS{UAA}=$CODONS{stop}; $TRIPLETS{UAG}=$CODONS{stop}; $TRIPLETS{UGA}=$CODONS{stop}; $TRIPLETS{UGG}=$CODONS{stop}; #Jedes Triplet wird nun ausgetauscht durch den 1-lettercode seiner Aminosäure foreach $trip (@triplets) { $TRIPLETS{uc($trip)}->{count}++; $trip = $TRIPLETS{uc($trip)}->{letter}; } #Berechnung des prozentualen Vorkommen jeder Aminosäure my $tot = keys(%CODONS); $trip->{percent}=$_->{count}/$tot*100 for (values(%CODONS)); #Das ganze wird auf das File Protein geschrieben open(PROTEINFILE , '>', $proteinfile) or die("ERROR open file ($proteinfile) ($!)\n"); print PROTEINFILE join( " " ,@triplets)."\n\n"; for (keys(%CODONS)) { printf PROTEINFILE "%s = %1.2f%\n\n", $_, $CODONS{$_}->{percent}; } close PROTEINFILE; print "Proteinsequence written on 'Protein'\n\n";