#!/usr/bin/perl # Task: Extract GeneID-Number and gene information use strict; use warnings; my $in; my $data; my @array; my $array; my $GeneID; my @BMB; my $flag = 0; my %hash; my $hash; # 1) open the .gff Inputfile and while reading line by line split $dat +a at each tab and put them in the @array open $in, '<', "Genomteil.gff" or die $!; while ($data = <$in>) { @array = split (/\t/, $data); if ($flag == 0) { if ($array[2] =~ /gene/) { #if you find the word 'gene' a textbloxk follows which contains some information I want to extract and put in an array) $flag = 1; # Set the flag. We will be expecting a 'CDS' or 'exon'-line next @BMB = ($array[3], $array[4], $array[6]); #the array will be used as values for my hash later } ## end if ($array[2] =~ /gene/) if ($array[8] =~ /.*;db_xref=GeneID:(\d+)\n/) { #if you find the word 'GeneID' extract the following number and put it in my hash (as key), then put the array in my hash $GeneID = $1; } ## end if ($array[8] =~ /.*;db_xref=GeneID:(\d+)\n/) } elsif ($flag == 1) { if ($array[2] =~ /CDS/) { push (@BMB, $array[2]); #put more data in my array } elsif ($array[2] =~ /exon/) { push (@BMB, $array[2]); } @{$hash{$GeneID}} = @BMB; $flag = 0; # Reset the flag. We will be expecting a 'gene'-line next } } ## end while ($data = <$in>) close $in; my $BMB; while (($GeneID, $BMB) = each %hash) { print "$GeneID => $BMB[0]\n"; }