Custom Download
* Download a set of packages into a pool structure for use as a partial mirror
{{{#!/usr/bin/perl -w # # Do Custom Download 0.4 # Download a set of packages and sources for a custom deb pool # Copyright (C) 2007 Daniel Dickinson <cshore@wightman.ca> # # This program is free software; you can redistribute it and/or modify # it under the terms version 2 of the GNU General Public License as # published by the Free Software Foundation.
# This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details.
# You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #
# Core modules use IO::File; use strict; use warnings; use File::Temp; use Fcntl qw/F_SETFD F_GETFD/; use Cwd;
# libcarp-clan-perl use Carp::Clan;
# libgetopt-mixed-perl use Getopt::Mixed "nextOption";
# libapt-pkg-perl use ?AptPkg::Config '$_config'; use ?AptPkg::System '$_system'; use ?AptPkg::Version;
use constant TRUE => 1; use constant FALSE => 0;
use constant EXIT_OK => 0; use constant EXIT_BAD_COMMAND_LINE => 1; use constant EXIT_UNREADABLE_FILE => 2; use constant EXIT_REPEATED_FIELD => 3; use constant EXIT_MISSING_FIELD => 4; use constant EXIT_MULTIPLE_STDIN => 5; use constant EXIT_BAD_ARCHIVE => 6; use constant EXIT_MISSING_FIELD_NAME => 7; use constant EXIT_BAD_SOURCE_FILES_FIELD => 8; use constant EXIT_STDIN_ERR => 9; use constant EXIT_FILE_OPEN_FAILED => 10; use constant EXIT_NO_HIGH_FIELDS => 11; use constant EXIT_NO_VERSION_FOR_PACKAGE => 12; use constant EXIT_CHDIR_FAILED => 13; use constant EXIT_WGET_TEMP_CLEARONEXEC_FAILED => 14; use constant EXIT_CWD_CHDIR_FAILED => 15;
use constant GERMINATE_HEADER_LINES => 2;
my $pkg_control_dir = ; my @distros; my @components; my @ignore_missing; my $include_installer = TRUE; my $from_germinate = FALSE; my $quiet = FALSE; my $verbose = 0; my $source_list_filename; my $binary_list_filename; my $pool_dir; my $archive = 'http://archive.debian.org/debian/'; my %binaries; my %sources; my @archs; my $filename; my $wget_list_handle; my $wget_filename; my %apt_ftparchive_file; my $download_size = 0; my $skip_download = FALSE; my $keep_wget_file = FALSE; my @required_binary_fields = qw/Package Version Filename Architecture Size/; my @required_source_fields = qw/Package Version Directory Architecture Files/; # TODO: _?FileList & ?SourceList for dists/components/Packages.gz &parse_command_line(); if ((defined $pkg_control_dir) && ($pkg_control_dir ne
- if (substr ($pkg_control_dir, -1, 1) ne '/') {
- $pkg_control_dir .= '/';
} else {
$pkg_control_dir = ;
}
if ((defined $pool_dir) && ($pool_dir ne )) {
- if (substr ($pool_dir, -1, 1) ne '/') {
- $pool_dir .= '/';
} else {
$pool_dir = ;
}
if ($verbose > 0) {
- print "Base dir for package control files: $pkg_control_dir\n";
}
if ((defined $archive) && ($archive ne )) {
- if (substr ($archive, -1, 1) ne '/') {
- $archive .= '/';
} else {
&print_error_exit("You specified an empty or invalid archive for downloads.\n",
- EXIT_BAD_ARCHIVE);
}
if ($verbose > 0) {
- print "Mirror archive for downloads: $archive\n";
}
if (!$quiet) {
print "Initializing package & source database...";
}
&parse_package_control_files($pkg_control_dir, \@distros, \@components,
- $include_installer);
if (!$quiet) {
- print "ok\n";
}
if (!$quiet) {
- print "Processing package lists...";
}
if (!$keep_wget_file) {
- $wget_list_handle = new File::Temp();
} else {
- $wget_list_handle = new IO::File $wget_filename, "w";
}
&process_package_lists($binary_list_filename, $source_list_filename);
if (!$quiet) {
- print "ok\n";
}
if (!$quiet) {
- print "Initiating download...";
}
if (!$skip_download) {
&do_download();
} else {
- print "skipped\n";
}
sub do_download {
- my $cwd = getcwd();
if ($pool_dir ne ) {
- if (!chdir($pool_dir)) {
- print_error_exit("Unable to change to directory $pool_dir\n",
- EXIT_CHDIR_FAILED);
- print_error_exit("Unable to change to directory $pool_dir\n",
- print_error_exit("Couldn't clear close-on-exec flag on temp filehandle: $wget_list_handle",
- EXIT_WGET_TEMP_CLEARONEXEC_FAILED);
my $wget_verbosity =
;if ($verbose > 1) {
- $wget_verbosity = '-v';
- $wget_verbosity = '-nv';
- $wget_verbosity = '-q';
- print_error_exit("Unable to return to original directory $cwd after downloading files\n",
- EXIT_CWD_CHDIR_FAILED);
- if (!chdir($pool_dir)) {
}
sub process_package_lists {
- my $binary_list_filename = shift; my $source_list_filename = shift; my $package_list_filehandle; my $is_source = FALSE; foreach my $filename ($binary_list_filename, $source_list_filename) {
if (($filename ne ) && ($filename eq $source_list_filename)) {
- $is_source = TRUE;
if ($filename eq
) {- next;
- $package_list_filehandle = new IO::File;
if (!($package_list_filehandle->fdopen(fileno(STDIN),"r"))) {
- print_error_exit("Unable to access standard input.\n",
- EXIT_STDIN_ERR);
- print_error_exit("Unable to access standard input.\n",
- if (!($package_list_filehandle = new IO::File $filename, "r")) {
- print_error_exit("Uable to open file: $filename.\n",
- EXIT_FILE_OPEN_FAILED);
- print_error_exit("Uable to open file: $filename.\n",
&process_package_list($package_list_filehandle, $is_source);
}
sub process_package_list {
- my $package_list_filehandle = shift; my $is_source = shift; my $line; my $line_number = 0; my $package_name;
while ($line = <$package_list_filehandle>) {
- if ($from_germinate) {
if ($line_number < GERMINATE_HEADER_LINES) {
- $line_number++; next;
- if ($line =~ m/^(.*?)(\s*?\|).*/) {
$package_name = &trim($1);
- if ($line =~ m/^(.*?)\s+\|.*/) {
$package_name = &trim($1);
$package_name = &trim($line);
if ((defined $package_name) && ($package_name ne )) {
if ($verbose > 3) {
- print "$package_name "; if (!$is_source) {
- print ': binary\n';
- print ': source\n';
&process_package($package_name, $is_source);
- print "$package_name "; if (!$is_source) {
- if ($from_germinate) {
}
sub process_package {
- my $package_name = shift; my $is_source = shift; my $ver_hash; my $version; my $fields; my $highest_fields; my $highest_version; if (!$is_source) {
- $ver_hash = $binaries{$package_name};
- $ver_hash = $sources{$package_name};
$_config->init; # determine the appropriate system type
$_system = $_config->system; # fetch a versioning system
my $versys = $_system->versioning;
while (($version, $fields) = each (%{$ver_hash})) {
- if (! defined($version)) {
&print_error_exit("No version for '$package_name'",
- EXIT_NO_VERSION_FOR_PACKAGE);
- $highest_version = $version; $highest_fields = $fields;
} elsif ($versys->compare($highest_version, $version) > 0 ) {
if ($verbose > 2) {
- print "ver: $version, high_ver: $highest_version\n";
- } if (! defined ($highest_fields)) {
&print_error_exit("No highest field for '$package_name'",
- EXIT_NO_HIGH_FIELDS);
if (&filter_package($package_name, $highest_fields, $highest_version, $is_source)) {
- my $filepath; my $filehandle; my $directory; if (!$is_source) {
if ($verbose > 2) {
- print $archive . ${$highest_fields}{'Filename'} . "\n";
- my @files = split /\s/, ${$highest_fields}{'Files'}; my @sizes = split /\s/, ${$highest_fields}{'Sizes'}; foreach my $size (@sizes) {
- $download_size += $size;
- $directory = ${$highest_fields}{'Directory'}; $filepath = $directory . '/' . $file;
if ($verbose > 2) {
- print $directory . '/' . $file . "\n";
if ($verbose > 2) {
- print $filepath . "\n";
- my $filename = $dist . '_' . $component . '_source'; my @files = split /\s/, ${$highest_fields}{'Files'}; foreach my $file (@files) {
- if ($file =~ m/.*\.dsc$/) {
- $filepath = $directory . '/' . $file;
&emit_ftparchive_list($filepath, $filename);
- $filepath = $directory . '/' . $file;
- if ($file =~ m/.*\.dsc$/) {
- foreach my $arch (split /\s/, $architecture) {
- my $filename = $dist . '_' . $component . '_debian-installer_binary-' . $arch;
&emit_ftparchive_list($filepath, $filename);
- my $filename = $dist . '_' . $component . '_debian-installer_binary-' . $arch;
- foreach my $arch (split /\s/, $architecture) {
- my $filename = $dist . '_' . $component . '_binary-' . $arch;
&emit_ftparchive_list($filepath, $filename);
- my $filename = $dist . '_' . $component . '_binary-' . $arch;
- my $filepath; my $filehandle; my $directory; if (!$is_source) {
}
sub emit_ftparchive_list {
- my $filepath = shift; my $filename = shift; my $filehandle = $apt_ftparchive_file{$filename}; if (!defined $filehandle) {
- $filehandle = new IO::File $filename, "w"; $apt_ftparchive_file{$filename} = $filehandle;
}
sub filter_package {
- my $package_name = shift; my $field_hash_ref = shift; my $version_hash_ref = shift; my $is_source = shift; my $pkgarch; my $cmdarch;
my $exclude_reason = ; my $excluded = FALSE; {
- if ( index("@components", ${$field_hash_ref}{'Component'}) == -1) {
- $excluded = TRUE; $exclude_reason = "Component ${$field_hash_ref}{'Component'} not included\n"; last;
} elsif ($verbose > 3) {
- print "$package_name in included component\n";
- foreach $cmdarch (@archs) {
if (($pkgarch eq 'any') || ($pkgarch eq $cmdarch)) {
- $included_arch = TRUE; last;
- last;
- $excluded = TRUE; $exclude_reason = "None of archs @{$field_hash_ref}{'Architecture'} included\n"; last;
- $is_installer = FALSE;
if ((!$include_installer) && $is_installer) {
- $excluded = TRUE; $exclude_reason = "Installer package but installer packages are being excluded"; last;
if ($excluded && ($verbose > 0)) {
- print STDERR "$package_name excluded: $exclude_reason";
- if ( index("@components", ${$field_hash_ref}{'Component'}) == -1) {
}
sub trim {
- $_ = $_[0]; if (defined $_) {
- s/^\s+//; s/\s+$//;
}
sub print_error_exit {
- my $error_message = shift; my $error_number = shift; if (!$quiet) {
- print STDERR "$error_message\n";
}
sub parse_command_line {
- my $option; my $option_value; my $print_help = TRUE; my $custom_archs = FALSE;
Getopt::Mixed::init('l=s pkg-control>l','a=s archive>a', 'd=s distro>d', 'c=s components>c', 'n no-installer>n','g germinate>g', 'q quiet>q', 'v verbose>v', 'h help>h', 'k=s architecture>k', 's=s source>s', 'b=s binary>b', 'p=s pool>p', 'skip-download', 'keep-wget-file=s'); while (($option, $option_value) = nextOption()) {
if ((defined $option) && ($option ne )) {
$option_value = &trim($option_value); if ($option eq 'a') {
- # Archive mirror to download from $archive = $option_value; $print_help = FALSE;
- # Directory which contains the package control files $pkg_control_dir = $option_value; $print_help = FALSE;
- # Directory which contains the pool subdirectory $pool_dir = $option_value; $print_help = FALSE;
- # parse comma-separated list of distributions (e.g. dapper, # dapper-updates,dapper-security) my @distros_split = split /,/, $option_value; foreach my $distro (@distros_split) {
if ((defined $distro) && ($distro ne
- chomp $distro;
$distro = &trim($distro); push @distros, $distro;
- # parse comma-separates list of components (e.g. main, # restricted,universe) my @components_split = split /,/, $option_value; foreach my $component (@components_split) {
if ((defined $component) && ($component ne )) {
- push @components, $component;
- # Architectures to include my @cmdarchs = split /,/, $option_value; foreach my $arch (@cmdarchs) {
if ((defined $arch) && ($arch ne )) {
- push @archs, $arch;
- }
- # Don't download installer packages $include_installer = FALSE; $print_help = FALSE;
- # Package list is of the output format generated by # germinated $from_germinate = TRUE; $print_help = FALSE;
- # Run with no messages (even errors) $quiet = TRUE; $print_help = FALSE; $verbose = 0;
- # Display verbose progress messages for debugging purposes. # Level of debugging depends on how many times this option is # specified if (!$quiet) {
- $verbose++;
- $verbose = 0;
- # List of source packages to include $source_list_filename = $option_value;
- # List of binary packages to include $binary_list_filename = $option_value;
- $skip_download = TRUE;
- $keep_wget_file = TRUE; $wget_filename = $option_value;
- # Unknown option so show help info $print_help = TRUE;
- @archs = qw/i386 all/;
$binary_list_filename = ;
$source_list_filename = ;
if (($source_list_filename eq ) && ($binary_list_filename eq )) {
- # No filename for packages to include is an error $print_help = TRUE;
- # Check filename(s) for package to include for read permission, or for # specifying stdin (-) more than once. my $use_stdin = FALSE; foreach my $filename (($binary_list_filename, $source_list_filename)) {
if ($filename ne ) {
- if (! -r $filename) {
- if ($filename eq '-') {
- if ($use_stdin) {
&print_error_exit("Error: You specified stdin as more than once.",
- EXIT_MULTIPLE_STDIN);
&print_error_exit('Error: You don\'t have read permissions on \
- if ($use_stdin) {
- if ($filename eq '-') {
- $filename . "'. Aborting.\n", EXIT_UNREADABLE_FILE);
- if (! -r $filename) {
if ($verbose > 1) {
print "
\n";
- print "Commandline parameters:\n";
print "
\n";
- print "archive mirror: $archive\n"; print "directory with pool subdir: $pool_dir\n"; print "package control base dir: $pkg_control_dir\n"; print "distributions: @distros\n"; print "components: @components\n"; print "architectures: @archs\n"; if ($include_installer) {
- print "include installer\n";
- print "don't include installer\n";
- print "package list from germinate\n";
- print "plain package list (not from germinate)\n";
if ($binary_list_filename ne ) {
- print "binary package list filename: $binary_list_filename\n";
if ($source_list_filename ne
) {- print "source package list filename: $source_list_filename\n";
if ($keep_wget_file ne ) {
- print "wget filename: $wget_filename\n";
- print "skipping download\n";
print "
\n";
- } if ($print_help) {
print <<'EOT';
Custom Apt Pool 0.4 Download a set of packages and sources for a custom deb pool Copyright (C) 2007 Daniel Dickinson <cshore@wightman.ca> Usage: do-custom-download.pl [options] [-a archive] -d=distro[,distro[,...]]
- -c=component[,...] -b binary-package-list-file -s src-package-list-file
-a mirror --archive=mirror : Mirror and root dir from which to download
packages. (e.g http://archive.debian.org/debian).
-l path --pkg-control=path : Where to look for package control files (relative
- to current directory)
-d distro --distro=distro : Comma-separated list of distributions (e.g
- -d sarge,sarge-security). At least one is required
--no-installer : Don't include installer packages -c component,.. --component=... : Comma-separated list of components to
- include (e.g. -c=main,contrib).
-k architecture --architecture=architecture : Comma-separated lists of
- architectures for which to include packages. Defaults to i386,all
-g --from-germinate : Package list is from germinate -p --pool : Directory containing the pool subdirectory -q --quiet : No messages, not even error (exit code only) -v --verbose : Display debugging messages This program is free software; you can redistribute it and/or modify it under the terms of the version 2 of the GNU General Public License as published by the Free Software Foundation. EOT
- Getopt::Mixed::cleanup(); exit EXIT_BAD_COMMAND_LINE;
- } Getopt::Mixed::cleanup();
}
sub parse_package_control_files {
- my $base_dir = shift; my $distros_ref = shift; my $components_ref = shift; my $include_installer = shift; my $distro; my $component; my $value; my $filename; # Package control files are of the form distribution_component_controltype
# where controltype is Packages, ?InstallerPackages, or Sources # So we iterate through the distros and components looking for readable files to parse # (in the base directory specified on the command line, or (if non specified) the current # directory). foreach $distro (@{$distros_ref}) {
- foreach $component (qw/main contrib/) {
- $filename = $base_dir . $distro . '_' . $component . '_Packages'; if (-r $filename) {
if ($verbose > 0) {
- print "Parsing $filename\n";
&parse_package_control_file($filename, $distro, $component, FALSE, FALSE);
} elsif ($verbose > 0) {
- print "Couldn't read $filename\n";
$filename = $base_dir . $distro . '_' . $component . '_?InstallerPackages'; if (-r $filename) {
if ($verbose > 0) {
- print "Parsing $filename\n";
&parse_package_control_file($filename, $distro, $component, TRUE, FALSE);
} elsif ($verbose > 0) {
- print "Couldn't read $filename\n";
if ($verbose > 0) {
- print "Parsing $filename\n";
&parse_package_control_file($filename, $distro, $component, FALSE, TRUE);
} elsif ($verbose > 0) {
- print "Couldn't read $filename\n";
- $filename = $base_dir . $distro . '_' . $component . '_Packages'; if (-r $filename) {
- foreach $component (qw/main contrib/) {
}
sub parse_package_control_package {
- my $package_hash_ref = shift; my $dist = shift; my $component = shift; my $is_installer = shift; my $is_source = shift; my $line_number = shift; my @required_fields; my %pkg_hash; my %ver_hash; my %field_hash; my $ver_hash_ref; my $version; my %pkg_archs; if ($is_source) {
- @required_fields = @required_source_fields;
- @required_fields = @required_binary_fields;
if ((! defined ${$package_hash_ref}{$required_field}) ||
(${$package_hash_ref}{$required_field} eq )) { &print_error_exit("Missing required field '$required_field' at or near line: $line_number",
- EXIT_MISSING_FIELD);
if ((! defined ${$package_hash_ref}{'Source'}) || (${$package_hash_ref}{'Source'} eq )) {
- ${$package_hash_ref}{'Source'} = ${$package_hash_ref}{'Package'};
# Look for the special case of package_name<whitespace>(version) like # gcc-defaults, if a match note the version as well as field value, otherwise # only field value my $field_value = ${$package_hash_ref}{'Source'}; my $source_version; if ($field_value =~ m/^(.+)\s*\((.+)\)/) {
$field_value = &trim($1); $source_version = &trim($2); if ($verbose > 2) {
- print "${$package_hash_ref}{'Package'} has source name '$field_value' and source version '$source_version'\n";
- $source_version = $version;
$field_hash{'?SourceVersion'} = $source_version; $field_hash{'Distro'} = $dist; $field_hash{'Component'} = $component; $field_hash{'Architecture'} = ${$package_hash_ref}{'Architecture'}; $field_hash{'Filename'} = ${$package_hash_ref}{'Filename'}; $field_hash{'Installer'} = $is_installer; $field_hash{'Size'} = ${$package_hash_ref}{'Size'}; # Get current hash with all versions of the current package $ver_hash_ref = $binaries{${$package_hash_ref}{'Package'}}; if (defined $ver_hash_ref) {
- # If version hash exists, add current version to it (or replace # existing version if version string identical ${$ver_hash_ref}{$version} = \%field_hash;
- # Otherwise create new hash and make it the version hash # for the current package $ver_hash{$version} = \%field_hash; $binaries{${$package_hash_ref}{'Package'}} = \%ver_hash;
- my @files; my @sizes; my $has_file = FALSE; foreach my $file_line (split /\n/, ${$package_hash_ref}{'Files'}) {
if ($verbose > 3) {
- print "multi-line raw: $file_line\n";
- push @files, $2; push @sizes, $1; $has_file = TRUE;
if ($verbose > 3) {
- print "Found file '$1'\n";
&print_error_exit("Invalid Files: field (${$package_hash_ref}{'Files'})", EXIT_BAD_SOURCE_FILES_FIELD);
- # If version hash exists, add current version, or replace # current version (if version strings identical) ${$ver_hash_ref}{$version} = \%field_hash;
- # Otherwise create new hash and make it the version hash # for the current package $ver_hash{$version} = \%field_hash; $sources{${$package_hash_ref}{'Package'}} = \%ver_hash;
if ($verbose > 3) {
- my $field; my $value; print "Package: ${$package_hash_ref}{'Package'}\n"; print "Version: $version\n"; while (($field, $value) = each (%field_hash)) {
- print "$field: $value\n";
}
sub parse_package_control_file {
- my $package_control_filename = shift; my $dist = shift; my $component = shift; my $is_installer = shift; my $is_source = shift; my $packages_control_handle = new IO::File $package_control_filename, "r"; my $line; my $line_number = 0;
my $package_name = ; my @line_array; my %package_hash;
my $field_name = ; my $field_value = ; my $new_package = TRUE; my $package_number = 0;
if ($verbose > 3) {
- print "Package#: $package_number\n";
while ($line = <$packages_control_handle>) {
- chomp $line; $line_number++; # In the Packages and Sources control files an initial space on a line means that the line # is the part of a multi-line field. my $first_space = index($line, ' '); # If the current line is an empty line, or not the continuation of a multi-line field, # then the line immediately prior was the completion of a field (and start for single-line fields) # We therefore record the field and value for this package.
if ((! defined $line) || ($line eq ) || (defined $first_space) && ($first_space != 0)) {
if ((defined $field_name) && ($field_name ne
- # If we already have a field of this name something is wrong
if ((defined $package_hash{$field_name}) && ($package_hash{$field_name} ne )) {
&print_error_exit("Repeated field $field_name at line $line_number in $package_control_filename",
- EXIT_REPEATED_FIELD);
- # Otherwise record the field name and value $package_hash{$field_name} = $field_value;
&print_error_exit("Missing field name at line $line_number in $package_control_filename", EXIT_MISSING_FIELD_NAME);
if ((! defined $line) || ($line eq )) {
&parse_package_control_package(\%package_hash, $dist, $component,
- $is_installer, $is_source, $line_number);
$field_name = ; $field_value = ; $new_package = TRUE; %package_hash = (); if ($verbose > 3) {
- print "Package#: $package_number\n";
- # So if we hit such a line, keep the current field name, and add this line to the value of # of the field (as a line)
$line = &trim($line); $field_value .= "$line\n";
- # Otherwise, the field ought to be 'fieldname: value', so split at the colon-space # and call the first item the fieldname, and the rest (space separated), the field value @line_array = split /:\s/, $line;
$field_name = &trim(shift @line_array); $field_value = &trim("@line_array");
if ($field_value eq ) {
- if (substr($line, -1, 1) eq ':') {
- $field_name = substr($line, 0, length($line) - 1);
if ($verbose > 3) {
- print "Found multi-line field '$field_name'\n";
- $field_name = substr($line, 0, length($line) - 1);
- if (substr($line, -1, 1) eq ':') {
} }}}