Python Namespace Packages

Namespace packages are a way of letting many individual packages provide modules inside a commonly-named package. For example, the PEAK framework provides many individual packages which provide modules inside the peak.util namespace. For example, the ?DecoratorTools package provides the peak.util.decoratortools module; other packages may provide other other modules inside the peak.util namespace.

The problem

As the packages from a big distribution such as PEAK and Zope are usually independent from each other, they will usually become separate Debian packages. For a module such as peak.util.decoratortools to work the peak and util directories must contain a blank file called __init__.py, in order to make them working, importable packages.

If each package providing a module inside the peak.util namespace distributes its own version of the placeholder file, one would try to overwrite the other at install time, and the packages would not be installable side-by-side.

The solutions

One possible solution is to use dpkg-divert, but that would not be a viable solution, as the number of diversions would be the same as the number of packages installed minus one, and it would be very difficult to choose which one to use as the real file.

Another solution would be having special packages providing the structure for the namespaces, so we'd have a python-ns-peak package, and a python-ns-peak-util, for instance, which would depend on the first, and the python-decoratortools would not provide the init.py files, depending on the python-ns-peak-util package. That solution would make the number of placeholder packages in the archive grow, which seems to be an unacceptable overhead for this problem.

A third, more workable solution, would be to have some tool handle the "installation" of namespaces, by creating the placeholder files as needed, and removing them correctly after the packages that have that namespace. A first try on that solution is listed below. It is a patch to python-support that finds the namespaces file (namespace_packages.txt inside the egg-info directory) and creates the needed __init__.py files.

One open issue is whether this solution will integrate correctly with python-central-using packages, and how. Testing shows that it does integrate nicely with easy_installed packages.

See http://kov.eti.br/~kov/python-ns/ for examples.

diff -urN python-support-0.6.4.old/update-python-modules python-support-0.6.4/update-python-modules
--- python-support-0.6.4.old/update-python-modules      2007-05-08 13:32:47.000000000 -0300
+++ python-support-0.6.4/update-python-modules  2007-05-30 13:55:06.000000000 -0300
@@ -154,6 +154,37 @@
 def clean_modules_gen(versions):
   return clean_modules
 
+def namespace_is_empty(dirpath):
+  dircontents=os.listdir(dirpath)
+  for item in ['__init__.py'+x for x in ['', 'c', 'o']]:
+    try:
+      dircontents.remove(item)
+    except ValueError:
+      pass
+  if len(dircontents) != 0:
+    return False
+  return True
+
+def clean_namespaces(basedir):
+  debug("Cleaning namespaces for %s..."%(basedir))
+  for ns in get_namespaces(basedir):
+    for py in dir_versions(basedir):
+      dirpath=os.path.join(basepath,py,ns)
+      if not os.path.exists(dirpath):
+        continue
+      initpath=os.path.join(dirpath,'__init__.py')
+      # we must check if the placeholder files are the
+      # only ones holding this 'namespace'; if there are
+      # more files/directories here this namespace has other
+      # users
+      if namespace_is_empty(dirpath):
+        to_remove=[initpath+x for x in ['', 'c', 'o']]
+        for path in to_remove:
+          if os.path.exists(path):
+            debug("remove "+path)
+            os.remove(path)
+        os.removedirs(dirpath)
+
 def process(basedir,func):
   debug("Looking at %s..."%(basedir))
   for dir, dirs, files in os.walk(basedir):
@@ -174,6 +205,62 @@
       if os.path.isdir(verdir):
         process(verdir,func([vers]))
 
+def find_egg_info_directory(basedir):
+  dirlist=os.listdir(basedir)
+  for f in dirlist:
+    file_name=os.path.join(basedir,f)
+    if file_name.endswith(".egg-info") and os.path.isdir(file_name):
+      return file_name
+  return None
+
+def find_namespaces_file(basedir):
+  egginfo_dir=find_egg_info_directory(basedir)
+  if egginfo_dir:
+    return os.path.join(egginfo_dir, "namespace_packages.txt")
+  else:
+    return None
+
+def sort_by_string_size(a, b):
+  la = len(a)
+  lb = len(b)
+  if la < lb:
+    return -1
+  elif la > lb:
+    return 1
+  else:
+    return 0
+
+def get_namespaces(basedir):
+  namespaces_file=find_namespaces_file(basedir)
+  if namespaces_file and os.path.exists(namespaces_file):
+    debug("Namespaces file found at "+namespaces_file)
+    try:
+      f=open(namespaces_file)
+      namespaces=[x.replace('.', '/').strip() for x in f]
+      f.close()
+      # sort the namespaces by the size of their name, in reverse
+      # so that deeper namespaces are handled first (mainly for
+      # cleaning)
+      namespaces.sort(sort_by_string_size, reverse=True)
+      if namespace_is_empty(dirpath):
+        to_remove=[initpath+x for x in ['', 'c', 'o']]
+        for path in to_remove:
+          if os.path.exists(path):
+            debug("remove "+path)
+            os.remove(path)
+        os.removedirs(dirpath)
+
 def process(basedir,func):
   debug("Looking at %s..."%(basedir))
   for dir, dirs, files in os.walk(basedir):
@@ -174,6 +205,62 @@
       if os.path.isdir(verdir):
         process(verdir,func([vers]))
 
+def find_egg_info_directory(basedir):
+  dirlist=os.listdir(basedir)
+  for f in dirlist:
+    file_name=os.path.join(basedir,f)
+    if file_name.endswith(".egg-info") and os.path.isdir(file_name):
+      return file_name
+  return None
+
+def find_namespaces_file(basedir):
+  egginfo_dir=find_egg_info_directory(basedir)
+  if egginfo_dir:
+    return os.path.join(egginfo_dir, "namespace_packages.txt")
+  else:
+    return None
+
+def sort_by_string_size(a, b):
+  la = len(a)
+  lb = len(b)
+  if la < lb:
+    return -1
+  elif la > lb:
+    return 1
+  else:
+    return 0
+
+def get_namespaces(basedir):
+  namespaces_file=find_namespaces_file(basedir)
+  if namespaces_file and os.path.exists(namespaces_file):
+    debug("Namespaces file found at "+namespaces_file)
+    try:
+      f=open(namespaces_file)
+      namespaces=[x.replace('.', '/').strip() for x in f]
+      f.close()
+      # sort the namespaces by the size of their name, in reverse
+      # so that deeper namespaces are handled first (mainly for
+      # cleaning)
+      namespaces.sort(sort_by_string_size, reverse=True)
+      return namespaces
+    except OSError, e:
+      debug("Failed to open the namespaces file: " + e.message)
+  return []
+
+def process_namespaces(basedir, version):
+  debug("Looking for namespaces on " + basedir)
+  destpath=os.path.join(basepath,version)
+  for namespace in get_namespaces(basedir):
+    initpath=os.path.join(destpath, namespace, '__init__.py')
+    debug("Namespace " + namespace + " should be registered as " + initpath)
+    if not os.path.exists(initpath):
+      debug("Registering namespace " + namespace + " on " + initpath)
+      try:
+        open(initpath, 'w').close()
+      except OSError, e:
+        debug("Failed to write the stub __init__.py in " + initpath)
+        debug("Error was: " + e.message)
+
 def dirlist_file(f):
   return [ l.rstrip('\n') for l in file(f) if len(l)>1 ]
 
@@ -269,6 +356,7 @@
     for basedir in dirs_i:
       process(basedir,install_modules([pyver]))
       process_extensions(basedir,install_modules,pyver)
+      process_namespaces(basedir, pyver)
     # Byte-compile after running install_modules
     bytecompile_all(pyver)
   if pyver not in py_installed and os.path.isdir(dir):
@@ -287,16 +375,20 @@
     bytecompile_privatedir(basedir)
   else:
     process(basedir,clean_simple)
+    clean_namespaces(basedir)
 
 to_bytecompile=to_clean=[]
 for basedir in do_dirs_i:
   if not options.clean_mode:
     process(basedir,install_modules(py_installed))
     process_extensions(basedir,install_modules)
+    for ver in py_installed:
+      process_namespaces(basedir, ver)
     to_bytecompile = concat(to_bytecompile,isect(dir_versions(basedir),py_installed))
   else:
     process(basedir,clean_modules)
     process_extensions(basedir,clean_modules_gen)
+    clean_namespaces(basedir)
     to_clean = concat(to_clean,isect(dir_versions(basedir),py_installed))
 # Byte-compile after running install_modules
 for py in to_bytecompile: