#!/usr/bin/perl # Task: Extract GeneID-Number and gene information use strict; use warnings; my $in; my $data; my @array; my $GeneID; my @BMB; my $flag = 0; my %hash; # 1) open the .gff Inputfile and while reading line by line split $data at each tab and put them in the @array open $in, '<', "Genomteil.gff" or die $!; while ($data = <$in>) { @array = split (/\t/, $data); if ($array[2] =~ /gene/) { #if you find the word 'gene' a textbloxk follows which contains some information I want to extract and put in an array) $flag = 1; # Set the flag, start of important data @BMB = ($array[3], $array[4], $array[6]); #the array will be used as values for my hash later } if ($array[8] =~ /.*;db_xref=GeneID:(\d+)\n/) { #if you find the word 'GeneID' extract the following number and put it in my hash (as key), then put the array in my hash $GeneID = $1; } if ($array[2] =~ /CDS/) { push (@BMB, $array[2]); #put more data in my array } elsif ($array[2] =~ /exon/) { push (@BMB, $array[2]); } $hash{$GeneID} = [ @BMB ]; if ($array [8]=~ /.*;exon_number=1/){ $flag = 0; } # Reset the flag. I am expecting a 'gene'-line next # end while ($data = <$in>) } close $in; foreach my $key (keys %hash) { foreach my $val (@{$hash{$key}}) { print "$key --> $val\n"; } }