#This page contains several Perl contructs that we will commonly use in the
#NCBI PowerScripting course. In the Perl scripts we will provide, we attempt to
#use these constructs as consistently as possible. |
#all text following a "#" in perl is treated as a comment
#ARGV[0] is the first commandline parameter, $ARGV[1] is the second.
open (INPUT,"<$ARGV[0]")||die "Aborting. Can't open $ARGV[0].\n";
while(<INPUT>){
#do something with the current record, the current line by default
#the current record is always available in the variable $_
chomp $_; #remove newline from $_
print "$_\n"; #print the current record with newline replaced
}
close(INPUT);
#Parsing a record in perl (by default, the record is a single line) |
$delimiter=",";
@fields=split("$delimiter",$_);
#$_ is the current record, $delimeter is a perl
#regular expression that defines the field separator
#in this case, the separator is a comma
#join is the inverse of split
$all_fields=join("$delimiter",@fields);
#Traversing a hash table (associative array) |
#In Perl, hashes are arrays indexed by arbitrary strings called keys, each of which
#points to a single data element.
#Hash names begin with the "%" character. However, when referencing a hash value,
#the dollar sign precedes the name and the key goes in curly brackets "{}": $hash{$key}
foreach $index (keys %hash){
#print each key and value in hash table called "hash" separated by a tab character
print "$index\t$hash{$index}\n";
}
#Working with Arrays in Perl |
#array names in perl begin with the "@" character
$bigdata="all,of,GenBank,and,RefSeq,in|a|big|file";
#Split also allows multiple delimiters enclosed in square brackets "[]"
@bigdata_array=split(/[,|]/,$bigdata);
foreach $bigitem (@bigdata_array){
#The =~ operator tests for the presence of the regular expression, in this case "a", in $bigitem.
#Push appends $bigitem to the end of the array @smalldata.
if ($bigitem =~ /a/){push (@smalldata,$bigitem);}
}
print @smalldata;
print join("\n",@smalldata);
#Accessing the Internet Using Perl |
#For retrieving data from a url post in perl, we will use the LWP module "get" function
use LWP::Simple; #supports the "get" function
$baseurl="http://eutils.ncbi.nlm.nih.gov/entrez/eutils/";
$eutil="esearch.fcgi?";
$parameters="db=nucleotide&term=human[orgn]+AND+jak3";
$url=$baseurl.$eutil.$parameters;
$raw=get($url);
print $raw;
#eutility output is given in XML by default
#we can parse the XML output in $raw using the following construction
#split $raw into an array of lines, @lines
@lines=split(/^/,$raw);
foreach $line (@lines){
#if a line contains an XML ID tag, the id, a string of digits, is captured and
#pushed onto an array called @ids
if ($line=~/<ID>(\d+)<\/ID>/){push(@ids,$1);}
}
#Using Subroutines in Perl |
#parameters are pased to a subroutine using a parameter hash table
$params{"term"}="mouse[orgn]";
%results=egquery(%params);
sub egquery {
#Parameters passed to a subroutine are stored in the special array @_
local %params=@_;
#Do something here with the parameters, putting the results in %results
#Then pass the results back to the subroutine call
return(%results);
}