#!/usr/bin/perl -pi
#
# Convert Mozilla Thunderbird Address Book to correct LDIF-format
# for importing to OpenLDAP-Servers
#
# This script does:
# * check if an "sn" attribute is available for the entry and
#   if not, creates one with the text "not given"
#   (inetOrgPerson has this attribute mentioned as mandatory...)
# * some sanity checks on the dn-attribute (including the value for cn,
#   etc.) and creates a value for cn if non given
# * decode BASE64 encoded values to make them available for
#   substitutions later on
# * Substitute unicode characters (umlauts, etc.) with the corresponding
#   ASCII-values (although disabled, see comment below)
# * remove the mail attribute from the dn-string and instead fills out
#   all other relevant information (dc, ou, ...)
# * set up the correct objectclasses for mozillaOrgPerson
#   (top -> inetOrgPerson -> mozillaOrgPerson)
# * remove some unsopported attributes (like preferred mail format, etc.)
# * encode previously decoded (and maybe modified) values to BASE64
#
# This script does _NOT_:
# * check if a cn is mentioned more than once
#   (I wanted to implement this, but it would lead to confusion and
#    frustration, because multiple cn's may contain different data
#    and so you have to merge it yourself, over and out)
#
# Invocation of this script:
#   ./this_script ldif-file
#
# Basic-Requirements:
#  * Perl (who would have guessed, hm?)
#  * for BASE64-stuff to work: MIME::Base64 Perl library
#
# by Alexander Griesser [perl@tuxx-home.at]
# 2005-10-04

use MIME::Base64;

# main initialization
if(not defined $sn_exists)
{
  $sn_exists = 0;
  $cn_exists = 0;

  # where to store your contacts?
  $basedn=",ou=Contacts,dc=molkereiow,dc=local";
  $unknown_index = 1;
}

if($_ =~ /^sn:/)
{
  $sn_exists = 1;
}

if($_ =~ /^cn:/)
{
  $cn_exists = 1;
}

if($_ =~ /^$/)
{
  if($sn_exists == 0)
  {
    print "sn: not given\n";
  }

  # if no cn-attribute exists, create one (must be the same
  # as below (No Name #), otherwise LDAP will complain with
  # LDAP_NAMING_VIOLATION).
  #
  # Note: we do not have to encode/decode something here, because
  # the value is autogenerated and doesn't contain special characters
  if($cn_exists == 0)
  {
    print "cn: " . $cn . "\n";
  }

  $sn_exists = 0;
  $cn_exists = 0;
}

# decode BASE64-encoded lines for our substitution
if($_ =~ /^(\w+):: (.*)/)
{
  $_ = sprintf("%s:: %s\n", $1, decode_base64($2));
}

# We need to check the cn attribute of the dn-line, because
# some contacts may not have a "mail=" attribute set and then we have
# to add our context-stuff at the end of the line
if($_ =~ /^(dn:+\s*)/)
{
  $prefix = $1;
  $cn = $_;

  # remove the \n at the end of the line
  chop $cn;

  # extract the cn out of the dn-line
  $cn =~ s/^dn:+\s*cn=([^=]+)(,\s*mail=.*)??$/$1/;

  # if no cn is given, $cn should start with "dn:", so we have to check for that
  # and provide a unique name for this entry
  if($cn =~ /^dn:/)
  {
    $cn = "NoName " . $unkown_index++;
  }

  # do some sanity checks on the cn (e.g. may not contain commas, ...)
  $cn =~ s/,/ /g;

  # build a new dn-line
  $_ = $prefix . "cn=" . $cn . $basedn . "\n";
}

if($_ =~ /^(cn:+\s*)/)
{
  $_ = $1 . $cn . "\n";
}

# substitute all unicode characters (umlauts and other stuff) with their
# corresponding ascii-values
#
# feel free to complete this list or post another way of how this could be
# done better ;)
#
# When importing with phpldapadmin this is not necessary - does the LDAP server
# itself handle this or is it phpldapadmin??
#

#s/\303\237/\337/g; # 
#s/\303\244/\344/g; # 
#s/\303\266/\366/g; # 
#s/\303\274/\374/g; # 
#s/\303\204/\304/g; # 
#s/\303\226/\326/g; # 
#s/\303\234/\334/g; # 
#s/\303\241/\341/g; # 
#s/\303\251/\351/g; # 
#s/\303\263/\363/g; # 
#s/\303\272/\372/g; # 

# Delete modifytimestamp (we are not allowed to set this attribute when importing)
s/^modifytimestamp.*\n//;

# Delete xmozillausehtmlmail (not supported by the current scheme)
s/^xmozillausehtmlmail.*\n//;

# Rewrite homeurl to be mozillaHomeUrl and workurl to be mozillaWorkUrl
s/^homeurl:/mozillaHomeUrl:/;
s/^workurl:/mozillaWorkUrl:/;

# Ensure that the objectclasses are correct
s/^objectclass: mozillaAbPersonObsolete.*\n//;
s/^objectclass: person.*\n//;
s/^objectclass: organizationalPerson.*\n//;
s/^objectclass: inetOrgPerson.*/objectclass: inetOrgPerson\nobjectclass: mozillaOrgPerson/;

# encode BASE64-decoded lines again
if($_ =~ /^(\w+):: (.*)/)
{
  # since base64 encoded strings may be longer than n bytes and get automatically
  # wrapped around to the next line, we have to take care of that
  #
  # if the string is too long, it contains an \n in it and to comply with the LDIF (??)
  # syntax, we have to insert a whitespace before the rest of this string (of course except
  # for the last \n, therefor we chop it first and printf it out afterwards)

  $prefix = $1;
  $base64str = encode_base64($2);
  chop $base64str;
  $base64str =~ s/\n/\n /g;
  $_ = sprintf("%s:: %s\n", $prefix, $base64str);
}