04_fill_empty_lists_from_prefix_list.pl

This Perl script is used to generate suggestions for unsuccessful UniProt queries. For every such prefix, the list of all strings starting by the specified prefix is selected and a random sample is saved as list of offered suggestions.

This is the documentation of all downloads.

Move your mouse over the code area and look at its top right corner where a set of icons for downloading, copy-pasting, etc. will appear.


#!/usr/bin/perl
use strict;
use warnings;

use List::Util 'shuffle';
use Tree::Prefix;

# =============== parameters ===============

our $limit=10;		# number of suggestions to show
our $min_char=2;

# ======================= initialize =======================

my @species=qw/drome caeel human/;

our $tree=new Tree::Prefix;
my $pwd=$tree->pwd;
$tree->{limit}=$limit;

# ======================= read from file =======================

for my $species (@species) {
	print STDERR "loading species: $species\n";

	$tree->set_basedir($pwd."/".$species);
	$tree->load_file_to_hash("$pwd/prefixes_$species.dat");

	fill_up_lists();
}

# ========== fill empty lists with random selection of completions ==========
sub fill_up_lists {
	my $prefix="";

	# when called recursively, current prefix is expected as argument
	if (@_>0) {
		$prefix=shift;
	}

	for my $letter (@{$tree->list_dir($prefix)}) {
		# fill up empty and long enough prefixes
		if (length $prefix.$letter >= $min_char && @{$tree->list_files($prefix.$letter)} == 0) {
			print "filling $prefix$letter\n";

			my $ls=$tree->{hash}->list_files($prefix.$letter);

			# get 10 random elements
			my @ls=shuffle(@$ls);
			@ls=@ls[0..min($limit, $#ls)];

			$tree->{dir}->insert_files($prefix.$letter, \@ls);
		}

		fill_up_lists($prefix.$letter);
	}
}

# ========== smallest element of given parameters ==========
sub min {
	my $min=100000000000;
	for (@_) {
		if ($_<$min) { $min=$_ }
	}

	return $min;
}