# Copyright 2005-2019 Davide Alberani <da@erlug.linux.it>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""
This package provides the IMDbSqlAccessSystem class used to access
IMDb's data through a SQL database. Every database supported by the SQLAlchemy
object relational mapper is available.

The :func:`imdb.IMDb` function will return an instance of this class
when called with the ``accessSystem`` parameter is set to "sql",
"database" or "db".
"""

import re
from difflib import SequenceMatcher
from codecs import lookup

from imdb import IMDbBase
from imdb.utils import normalizeName, normalizeTitle, build_title, \
    build_name, analyze_name, analyze_title, \
                        canonicalTitle, canonicalName, re_titleRef, \
                        build_company_name, re_episodes, _unicodeArticles, \
                        analyze_company_name, re_year_index, re_nameRef
from imdb.Person import Person
from imdb.Movie import Movie
from imdb.Company import Company
from imdb.parser.sql.logging import logger
from imdb._exceptions import IMDbDataAccessError, IMDbError


# Logger for miscellaneous functions.
_aux_logger = logger.getChild('aux')

# =============================
# Things that once upon a time were in imdb.parser.common.locsql.


def titleVariations(title, fromPtdf=False):
    """Build title variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    if fromPtdf:
        title1 = ''
    else:
        title1 = title
    title2 = title3 = ''
    if fromPtdf or re_year_index.search(title):
        # If it appears to have a (year[/imdbIndex]) indication,
        # assume that a long imdb canonical name was provided.
        titldict = analyze_title(title, canonical=1)
        # title1: the canonical name.
        title1 = titldict['title']
        if titldict['kind'] != 'episode':
            # title3: the long imdb canonical name.
            if fromPtdf:
                title3 = title
            else:
                title3 = build_title(titldict, canonical=1, ptdf=1)
        else:
            title1 = normalizeTitle(title1)
            title3 = build_title(titldict, canonical=1, ptdf=1)
    else:
        # Just a title.
        # title1: the canonical title.
        title1 = canonicalTitle(title)
        title3 = ''
    # title2 is title1 without the article, or title1 unchanged.
    if title1:
        title2 = title1
        t2s = title2.split(', ')
        if t2s[-1].lower() in _unicodeArticles:
            title2 = ', '.join(t2s[:-1])
    _aux_logger.debug('title variations: 1:[%s] 2:[%s] 3:[%s]',
                      title1, title2, title3)
    return title1, title2, title3


re_nameIndex = re.compile(r'\(([IVXLCDM]+)\)')


def nameVariations(name, fromPtdf=False):
    """Build name variations useful for searches; if fromPtdf is true,
    the input is assumed to be in the plain text data files format."""
    name1 = name2 = name3 = ''
    if fromPtdf or re_nameIndex.search(name):
        # We've a name with an (imdbIndex)
        namedict = analyze_name(name, canonical=1)
        # name1 is the name in the canonical format.
        name1 = namedict['name']
        # name3 is the canonical name with the imdbIndex.
        if fromPtdf:
            if 'imdbIndex' in namedict:
                name3 = name
        else:
            name3 = build_name(namedict, canonical=1)
    else:
        # name1 is the name in the canonical format.
        name1 = canonicalName(name)
        name3 = ''
    # name2 is the name in the normal format, if it differs from name1.
    name2 = normalizeName(name1)
    if name1 == name2:
        name2 = ''
    _aux_logger.debug('name variations: 1:[%s] 2:[%s] 3:[%s]',
                      name1, name2, name3)
    return name1, name2, name3


def ratcliff(s1, s2, sm):
    """Ratcliff-Obershelp similarity."""
    STRING_MAXLENDIFFER = 0.7
    s1len = len(s1)
    s2len = len(s2)
    if s1len < s2len:
        threshold = float(s1len) / s2len
    else:
        threshold = float(s2len) / s1len
    if threshold < STRING_MAXLENDIFFER:
        return 0.0
    sm.set_seq2(s2.lower())
    return sm.ratio()


def merge_roles(mop):
    """Merge multiple roles."""
    new_list = []
    for m in mop:
        m_isinnewlist = False
        m_indexinnewlist = None
        if isinstance(m, Person):
            for i, person in enumerate(new_list):
                if person.isSamePerson(m):
                    m_isinnewlist = True
                    m_indexinnewlist = i
                    break
        else:
            if m in new_list:
                m_isinnewlist = True
                m_indexinnewlist = new_list.index(m)
        if m_isinnewlist:
            keep_this = new_list[m_indexinnewlist]
            if not isinstance(keep_this.currentRole, list):
                keep_this.currentRole = [keep_this.currentRole]
            keep_this.currentRole.append(m.currentRole)
        else:
            new_list.append(m)
    return new_list


def scan_names(name_list, name1, name2, name3, results=0, ro_thresold=None,
               _scan_character=False):
    """Scan a list of names, searching for best matches against
    the given variations."""
    if ro_thresold is not None:
        RO_THRESHOLD = ro_thresold
    else:
        RO_THRESHOLD = 0.6
    sm1 = SequenceMatcher()
    sm2 = SequenceMatcher()
    sm3 = SequenceMatcher()
    sm1.set_seq1(name1.lower())
    if name2:
        sm2.set_seq1(name2.lower())
    if name3:
        sm3.set_seq1(name3.lower())
    resd = {}
    for i, n_data in name_list:
        nil = n_data['name']
        # Distance with the canonical name.
        ratios = [ratcliff(name1, nil, sm1) + 0.05]
        namesurname = ''
        if not _scan_character:
            nils = nil.split(', ', 1)
            surname = nils[0]
            if len(nils) == 2:
                namesurname = '%s %s' % (nils[1], surname)
        else:
            nils = nil.split(' ', 1)
            surname = nils[-1]
            namesurname = nil
        if surname != nil:
            # Distance with the "Surname" in the database.
            ratios.append(ratcliff(name1, surname, sm1))
            if not _scan_character:
                ratios.append(ratcliff(name1, namesurname, sm1))
            if name2:
                ratios.append(ratcliff(name2, surname, sm2))
                # Distance with the "Name Surname" in the database.
                if namesurname:
                    ratios.append(ratcliff(name2, namesurname, sm2))
        if name3:
            # Distance with the long imdb canonical name.
            ratios.append(ratcliff(name3,
                                   build_name(n_data, canonical=1), sm3) + 0.1)
        ratio = max(ratios)
        if ratio >= RO_THRESHOLD:
            if i in resd:
                if ratio > resd[i][0]:
                    resd[i] = (ratio, (i, n_data))
            else:
                resd[i] = (ratio, (i, n_data))
    res = list(resd.values())
    res.sort()
    res.reverse()
    if results > 0:
        res[:] = res[:results]
    return res


def scan_titles(titles_list, title1, title2, title3, results=0,
                searchingEpisode=0, onlyEpisodes=0, ro_thresold=None):
    """Scan a list of titles, searching for best matches against
    the given variations."""
    if ro_thresold is not None:
        RO_THRESHOLD = ro_thresold
    else:
        RO_THRESHOLD = 0.6
    sm1 = SequenceMatcher()
    sm2 = SequenceMatcher()
    sm3 = SequenceMatcher()
    sm1.set_seq1(title1.lower())
    sm2.set_seq2(title2.lower())
    if title3:
        sm3.set_seq1(title3.lower())
        if title3[-1] == '}':
            searchingEpisode = 1
    hasArt = 0
    if title2 != title1:
        hasArt = 1
    resd = {}
    for i, t_data in titles_list:
        if onlyEpisodes:
            if t_data.get('kind') != 'episode':
                continue
            til = t_data['title']
            if til[-1] == ')':
                dateIdx = til.rfind('(')
                if dateIdx != -1:
                    til = til[:dateIdx].rstrip()
            if not til:
                continue
            ratio = ratcliff(title1, til, sm1)
            if ratio >= RO_THRESHOLD:
                resd[i] = (ratio, (i, t_data))
            continue
        if searchingEpisode:
            if t_data.get('kind') != 'episode':
                continue
        elif t_data.get('kind') == 'episode':
            continue
        til = t_data['title']
        # Distance with the canonical title (with or without article).
        #   titleS      -> titleR
        #   titleS, the -> titleR, the
        if not searchingEpisode:
            til = canonicalTitle(til)
            ratios = [ratcliff(title1, til, sm1) + 0.05]
            # til2 is til without the article, if present.
            til2 = til
            tils = til2.split(', ')
            matchHasArt = 0
            if tils[-1].lower() in _unicodeArticles:
                til2 = ', '.join(tils[:-1])
                matchHasArt = 1
            if hasArt and not matchHasArt:
                #   titleS[, the]  -> titleR
                ratios.append(ratcliff(title2, til, sm2))
            elif matchHasArt and not hasArt:
                #   titleS  -> titleR[, the]
                ratios.append(ratcliff(title1, til2, sm1))
        else:
            ratios = [0.0]
        if title3:
            # Distance with the long imdb canonical title.
            ratios.append(ratcliff(title3,
                                   build_title(t_data, canonical=1, ptdf=1), sm3) + 0.1)
        ratio = max(ratios)
        if ratio >= RO_THRESHOLD:
            if i in resd:
                if ratio > resd[i][0]:
                    resd[i] = (ratio, (i, t_data))
            else:
                resd[i] = (ratio, (i, t_data))
    res = list(resd.values())
    res.sort()
    res.reverse()
    if results > 0:
        res[:] = res[:results]
    return res


def scan_company_names(name_list, name1, results=0, ro_thresold=None):
    """Scan a list of company names, searching for best matches against
    the given name.  Notice that this function takes a list of
    strings, and not a list of dictionaries."""
    if ro_thresold is not None:
        RO_THRESHOLD = ro_thresold
    else:
        RO_THRESHOLD = 0.6
    sm1 = SequenceMatcher()
    sm1.set_seq1(name1.lower())
    resd = {}
    withoutCountry = not name1.endswith(']')
    for i, n in name_list:
        o_name = n
        var = 0.0
        if withoutCountry and n.endswith(']'):
            cidx = n.rfind('[')
            if cidx != -1:
                n = n[:cidx].rstrip()
                var = -0.05
        # Distance with the company name.
        ratio = ratcliff(name1, n, sm1) + var
        if ratio >= RO_THRESHOLD:
            if i in resd:
                if ratio > resd[i][0]:
                    resd[i] = (ratio,
                               (i, analyze_company_name(o_name)))
            else:
                resd[i] = (ratio, (i, analyze_company_name(o_name)))
    res = list(resd.values())
    res.sort()
    res.reverse()
    if results > 0:
        res[:] = res[:results]
    return res


_translate = dict(B='1', C='2', D='3', F='1', G='2', J='2', K='2', L='4',
                    M='5', N='5', P='1', Q='2', R='6', S='2', T='3', V='1',
                    X='2', Z='2')
_translateget = _translate.get
_re_non_ascii = re.compile(r'^[^a-z]*', re.I)
SOUNDEX_LEN = 5


def soundex(s):
    """Return the soundex code for the given string."""
    # Maximum length of the soundex code.
    s = _re_non_ascii.sub('', s)
    if not s:
        return None
    s = s.upper()
    soundCode = s[0]
    for c in s[1:]:
        cw = _translateget(c, '0')
        if cw != '0' and soundCode[-1] != cw:
            soundCode += cw
    return soundCode[:SOUNDEX_LEN] or None


def _sortKeywords(keyword, kwds):
    """Sort a list of keywords, based on the searched one."""
    sm = SequenceMatcher()
    sm.set_seq1(keyword.lower())
    ratios = [(ratcliff(keyword, k, sm), k) for k in kwds]
    checkContained = False
    if len(keyword) > 4:
        checkContained = True
    for idx, data in enumerate(ratios):
        ratio, key = data
        if key.startswith(keyword):
            ratios[idx] = (ratio + 0.5, key)
        elif checkContained and keyword in key:
            ratios[idx] = (ratio + 0.3, key)
    ratios.sort()
    ratios.reverse()
    return [r[1] for r in ratios]


def filterSimilarKeywords(keyword, kwdsIterator):
    """Return a sorted list of keywords similar to the one given."""
    seenDict = {}
    kwdSndx = soundex(keyword)
    matches = []
    matchesappend = matches.append
    checkContained = False
    if len(keyword) > 4:
        checkContained = True
    for movieID, key in kwdsIterator:
        if key in seenDict:
            continue
        seenDict[key] = None
        if checkContained and keyword in key:
            matchesappend(key)
            continue
        if kwdSndx == soundex(key):
            matchesappend(key)
    return _sortKeywords(keyword, matches)


# =============================

_litlist = ['screenplay/teleplay', 'novel', 'adaption', 'book',
            'production process protocol', 'interviews',
            'printed media reviews', 'essays', 'other literature']
_litd = dict([(x, ('literature', x)) for x in _litlist])

_buslist = ['budget', 'weekend gross', 'gross', 'opening weekend', 'rentals',
            'admissions', 'filming dates', 'production dates', 'studios',
            'copyright holder']
_busd = dict([(x, ('business', x)) for x in _buslist])


def _reGroupDict(d, newgr):
    """Regroup keys in the d dictionary in subdictionaries, based on
    the scheme in the newgr dictionary.
    E.g.: in the newgr, an entry 'LD label': ('laserdisc', 'label')
    tells the _reGroupDict() function to take the entry with
    label 'LD label' (as received from the sql database)
    and put it in the subsection (another dictionary) named
    'laserdisc', using the key 'label'."""
    r = {}
    newgrks = list(newgr.keys())
    for k, v in list(d.items()):
        if k in newgrks:
            r.setdefault(newgr[k][0], {})[newgr[k][1]] = v
        else:
            r[k] = v
    return r


def _groupListBy(l, index):
    """Regroup items in a list in a list of lists, grouped by
    the value at the given index."""
    tmpd = {}
    for item in l:
        tmpd.setdefault(item[index], []).append(item)
    res = list(tmpd.values())
    return res


def sub_dict(d, keys):
    """Return the subdictionary of 'd', with just the keys listed in 'keys'."""
    return dict([(k, d[k]) for k in keys if k in d])


def get_movie_data(movieID, kindDict, fromAka=0, _table=None):
    """Return a dictionary containing data about the given movieID;
    if fromAka is true, the AkaTitle table is searched; _table is
    reserved for the imdbpy2sql.py script."""
    if _table is not None:
        Table = _table
    else:
        if not fromAka:
            Table = Title
        else:
            Table = AkaTitle
    try:
        m = Table.get(movieID)
    except Exception as e:
        _aux_logger.warn('Unable to fetch information for movieID %s: %s', movieID, e)
        mdict = {}
        return mdict
    mdict = {'title': m.title, 'kind': kindDict[m.kindID],
             'year': m.productionYear, 'imdbIndex': m.imdbIndex,
             'season': m.seasonNr, 'episode': m.episodeNr}
    if not fromAka:
        if m.seriesYears is not None:
            mdict['series years'] = str(m.seriesYears)
    if mdict['imdbIndex'] is None:
        del mdict['imdbIndex']
    if mdict['year'] is None:
        del mdict['year']
    else:
        try:
            mdict['year'] = int(mdict['year'])
        except (TypeError, ValueError):
            del mdict['year']
    if mdict['season'] is None:
        del mdict['season']
    else:
        try:
            mdict['season'] = int(mdict['season'])
        except:
            pass
    if mdict['episode'] is None:
        del mdict['episode']
    else:
        try:
            mdict['episode'] = int(mdict['episode'])
        except:
            pass
    episodeOfID = m.episodeOfID
    if episodeOfID is not None:
        ser_dict = get_movie_data(episodeOfID, kindDict, fromAka)
        mdict['episode of'] = Movie(data=ser_dict, movieID=episodeOfID,
                                    accessSystem='sql')
        if fromAka:
            ser_note = AkaTitle.get(episodeOfID).note
            if ser_note:
                mdict['episode of'].notes = ser_note
    return mdict


def _iterKeywords(results):
    """Iterate over (key.id, key.keyword) columns of a selection of
    the Keyword table."""
    for key in results:
        yield key.id, key.keyword


def getSingleInfo(table, movieID, infoType, notAList=False):
    """Return a dictionary in the form {infoType: infoListOrString},
    retrieving a single set of information about a given movie, from
    the specified table."""
    infoTypeID = InfoType.select(InfoType.q.info == infoType)
    if infoTypeID.count() == 0:
        return {}
    res = table.select(AND(table.q.movieID == movieID,
                           table.q.infoTypeID == infoTypeID[0].id))
    retList = []
    for r in res:
        info = r.info
        note = r.note
        if note:
            info += '::%s' % note
        retList.append(info)
    if not retList:
        return {}
    if not notAList:
        return {infoType: retList}
    else:
        return {infoType: retList[0]}


def _cmpTop(a, b, what='top 250 rank'):
    """Compare function used to sort top 250/bottom 10 rank."""
    av = int(a[1].get(what))
    bv = int(b[1].get(what))
    if av == bv:
        return 0
    return (-1, 1)[av > bv]


def _cmpBottom(a, b):
    """Compare function used to sort top 250/bottom 10 rank."""
    return _cmpTop(a, b, what='bottom 10 rank')


class IMDbSqlAccessSystem(IMDbBase):

    """The class used to access IMDb's data through a SQL database."""

    accessSystem = 'sql'

    def __init__(self, uri, adultSearch=True, *arguments, **keywords):
        """Initialize the access system."""
        IMDbBase.__init__(self, *arguments, **keywords)
        DB_TABLES = []
        try:
            from .alchemyadapter import getDBTables, NotFoundError, \
                setConnection, AND, OR, IN, \
                                        ISNULL, CONTAINSSTRING, toUTF8
            # XXX: look ma'... black magic!  It's used to make
            #      TableClasses and some functions accessible
            #      through the whole module.
            for k, v in [('NotFoundError', NotFoundError),
                         ('AND', AND), ('OR', OR), ('IN', IN),
                         ('ISNULL', ISNULL),
                         ('CONTAINSSTRING', CONTAINSSTRING)]:
                globals()[k] = v
            self.toUTF8 = toUTF8
            DB_TABLES = getDBTables(uri)
            for t in DB_TABLES:
                globals()[t._imdbpyName] = t
        except ImportError as e:
            raise IMDbError('unable to import SQLAlchemy')
        # Set the connection to the database.
        logger.debug('connecting to %s', uri)
        try:
            self._connection = setConnection(uri, DB_TABLES)
        except AssertionError as e:
            raise IMDbDataAccessError(
                'unable to connect to the database server; ' +
                    'complete message: "%s"' % str(e))
        self.Error = self._connection.module.Error
        # Maps some IDs to the corresponding strings.
        self._kind = {}
        self._kindRev = {}
        logger.debug('reading constants from the database')
        try:
            for kt in KindType.select():
                self._kind[kt.id] = kt.kind
                self._kindRev[str(kt.kind)] = kt.id
        except self.Error:
            # NOTE: you can also get the error, but - at least with
            #       MySQL - it also contains the password, and I don't
            #       like the idea to print it out.
            raise IMDbDataAccessError(
                'unable to connect to the database server')
        self._role = {}
        for rl in RoleType.select():
            self._role[rl.id] = str(rl.role)
        self._info = {}
        self._infoRev = {}
        for inf in InfoType.select():
            self._info[inf.id] = str(inf.info)
            self._infoRev[str(inf.info)] = inf.id
        self._compType = {}
        for cType in CompanyType.select():
            self._compType[cType.id] = cType.kind
        info = [(it.id, it.info) for it in InfoType.select()]
        self._compcast = {}
        for cc in CompCastType.select():
            self._compcast[cc.id] = str(cc.kind)
        self._link = {}
        for lt in LinkType.select():
            self._link[lt.id] = str(lt.link)
        self._moviesubs = {}
        # Build self._moviesubs, a dictionary used to rearrange
        # the data structure for a movie object.
        for vid, vinfo in info:
            if not vinfo.startswith('LD '):
                continue
            self._moviesubs[vinfo] = ('laserdisc', vinfo[3:])
        self._moviesubs.update(_litd)
        self._moviesubs.update(_busd)
        self.do_adult_search(adultSearch)

    def _findRefs(self, o, trefs, nrefs):
        """Find titles or names references in strings."""
        if isinstance(o, str):
            for title in re_titleRef.findall(o):
                a_title = analyze_title(title, canonical=0)
                rtitle = build_title(a_title, ptdf=1)
                if rtitle in trefs:
                    continue
                movieID = self._getTitleID(rtitle)
                if movieID is None:
                    movieID = self._getTitleID(title)
                if movieID is None:
                    continue
                m = Movie(title=rtitle, movieID=movieID,
                          accessSystem=self.accessSystem)
                trefs[rtitle] = m
                rtitle2 = canonicalTitle(a_title.get('title', ''))
                if rtitle2 and rtitle2 != rtitle and rtitle2 != title:
                    trefs[rtitle2] = m
                if title != rtitle:
                    trefs[title] = m
            for name in re_nameRef.findall(o):
                a_name = analyze_name(name, canonical=1)
                rname = build_name(a_name, canonical=1)
                if rname in nrefs:
                    continue
                personID = self._getNameID(rname)
                if personID is None:
                    personID = self._getNameID(name)
                if personID is None:
                    continue
                p = Person(name=rname, personID=personID,
                           accessSystem=self.accessSystem)
                nrefs[rname] = p
                rname2 = normalizeName(a_name.get('name', ''))
                if rname2 and rname2 != rname:
                    nrefs[rname2] = p
                if name != rname and name != rname2:
                    nrefs[name] = p
        elif isinstance(o, (list, tuple)):
            for item in o:
                self._findRefs(item, trefs, nrefs)
        elif isinstance(o, dict):
            for value in list(o.values()):
                self._findRefs(value, trefs, nrefs)
        return trefs, nrefs

    def _extractRefs(self, o):
        """Scan for titles or names references in strings."""
        trefs = {}
        nrefs = {}
        try:
            return self._findRefs(o, trefs, nrefs)
        except RuntimeError as e:
            # Symbian/python 2.2 has a poor regexp implementation.
            import warnings
            warnings.warn('RuntimeError in '
                          "imdb.parser.sql.IMDbSqlAccessSystem; "
                          "if it's not a recursion limit exceeded and we're not "
                          "running in a Symbian environment, it's a bug:\n%s" % e)
            return trefs, nrefs

    def _changeAKAencoding(self, akanotes, akatitle):
        """Return akatitle in the correct charset, as specified in
        the akanotes field; if akatitle doesn't need to be modified,
        return None."""
        oti = akanotes.find('(original ')
        if oti == -1:
            return None
        ote = akanotes[oti + 10:].find(' title)')
        if ote != -1:
            cs_info = akanotes[oti + 10:oti + 10 + ote].lower().split()
            for e in cs_info:
                # excludes some strings that clearly are not encoding.
                if e in ('script', '', 'cyrillic', 'greek'):
                    continue
                if e.startswith('iso-') and e.find('latin') != -1:
                    e = e[4:].replace('-', '')
                try:
                    lookup(e)
                    lat1 = akatitle.encode('latin_1', 'replace')
                    return str(lat1, e, 'replace')
                except (LookupError, ValueError, TypeError):
                    continue
        return None

    def _buildNULLCondition(self, col, val):
        """Build a comparison for columns where values can be NULL."""
        if val is None:
            return ISNULL(col)
        else:
            if isinstance(val, int):
                return col == val
            else:
                return col == self.toUTF8(val)

    def _getTitleID(self, title):
        """Given a long imdb canonical title, returns a movieID or
        None if not found."""
        td = analyze_title(title)
        condition = None
        if td['kind'] == 'episode':
            epof = td['episode of']
            seriesID = [s.id for s in Title.select(
                        AND(Title.q.title == self.toUTF8(epof['title']),
                            self._buildNULLCondition(Title.q.imdbIndex,
                                                     epof.get('imdbIndex')),
                            Title.q.kindID == self._kindRev[epof['kind']],
                            self._buildNULLCondition(Title.q.productionYear,
                                                     epof.get('year'))))]
            if seriesID:
                condition = AND(IN(Title.q.episodeOfID, seriesID),
                                Title.q.title == self.toUTF8(td['title']),
                                self._buildNULLCondition(Title.q.imdbIndex,
                                                         td.get('imdbIndex')),
                                Title.q.kindID == self._kindRev[td['kind']],
                                self._buildNULLCondition(Title.q.productionYear,
                                                         td.get('year')))
        if condition is None:
            condition = AND(Title.q.title == self.toUTF8(td['title']),
                            self._buildNULLCondition(Title.q.imdbIndex,
                                                     td.get('imdbIndex')),
                            Title.q.kindID == self._kindRev[td['kind']],
                            self._buildNULLCondition(Title.q.productionYear,
                                                     td.get('year')))
        res = Title.select(condition)
        try:
            if res.count() != 1:
                return None
        except (UnicodeDecodeError, TypeError):
            return None
        return res[0].id

    def _getNameID(self, name):
        """Given a long imdb canonical name, returns a personID or
        None if not found."""
        nd = analyze_name(name)
        res = Name.select(AND(Name.q.name == self.toUTF8(nd['name']),
                              self._buildNULLCondition(Name.q.imdbIndex,
                                                       nd.get('imdbIndex'))))
        try:
            if res.count() != 1:
                return None
        except (UnicodeDecodeError, TypeError):
            return None
        return res[0].id

    def _normalize_movieID(self, movieID):
        """Normalize the given movieID."""
        try:
            return int(movieID)
        except (ValueError, OverflowError):
            raise IMDbError('movieID "%s" can\'t be converted to integer' %
                            movieID)

    def _normalize_personID(self, personID):
        """Normalize the given personID."""
        try:
            return int(personID)
        except (ValueError, OverflowError):
            raise IMDbError('personID "%s" can\'t be converted to integer' %
                            personID)

    def _normalize_characterID(self, characterID):
        """Normalize the given characterID."""
        try:
            return int(characterID)
        except (ValueError, OverflowError):
            raise IMDbError('characterID "%s" can\'t be converted to integer'
                            % characterID)

    def _normalize_companyID(self, companyID):
        """Normalize the given companyID."""
        try:
            return int(companyID)
        except (ValueError, OverflowError):
            raise IMDbError('companyID "%s" can\'t be converted to integer'
                            % companyID)

    def get_imdbMovieID(self, movieID):
        """Translate a movieID in an imdbID.
        If not in the database, try an Exact Primary Title search on IMDb;
        return None if it's unable to get the imdbID.
        """
        try:
            movie = Title.get(movieID)
        except NotFoundError:
            return None
        imdbID = movie.imdbID
        if imdbID is not None:
            return '%07d' % imdbID
        m_dict = get_movie_data(movie.id, self._kind)
        titline = build_title(m_dict, ptdf=False)
        imdbID = self.title2imdbID(titline, m_dict['kind'])
        # If the imdbID was retrieved from the web and was not in the
        # database, update the database (ignoring errors, because it's
        # possibile that the current user has not update privileges).
        # There're times when I think I'm a genius; this one of
        # those times... <g>
        if imdbID is not None and not isinstance(imdbID, list):
            try:
                movie.imdbID = int(imdbID)
            except:
                pass
        return imdbID

    def get_imdbPersonID(self, personID):
        """Translate a personID in an imdbID.
        If not in the database, try an Exact Primary Name search on IMDb;
        return None if it's unable to get the imdbID.
        """
        try:
            person = Name.get(personID)
        except NotFoundError:
            return None
        imdbID = person.imdbID
        if imdbID is not None:
            return '%07d' % imdbID
        n_dict = {'name': person.name, 'imdbIndex': person.imdbIndex}
        namline = build_name(n_dict, canonical=False)
        imdbID = self.name2imdbID(namline)
        if imdbID is not None and not isinstance(imdbID, list):
            try:
                person.imdbID = int(imdbID)
            except:
                pass
        return imdbID

    def get_imdbCharacterID(self, characterID):
        """Translate a characterID in an imdbID.
        If not in the database, try an Exact Primary Name search on IMDb;
        return None if it's unable to get the imdbID.
        """
        try:
            character = CharName.get(characterID)
        except NotFoundError:
            return None
        imdbID = character.imdbID
        if imdbID is not None:
            return '%07d' % imdbID
        n_dict = {'name': character.name, 'imdbIndex': character.imdbIndex}
        namline = build_name(n_dict, canonical=False)
        imdbID = self.character2imdbID(namline)
        if imdbID is not None and not isinstance(imdbID, list):
            try:
                character.imdbID = int(imdbID)
            except:
                pass
        return imdbID

    def get_imdbCompanyID(self, companyID):
        """Translate a companyID in an imdbID.
        If not in the database, try an Exact Primary Name search on IMDb;
        return None if it's unable to get the imdbID.
        """
        try:
            company = CompanyName.get(companyID)
        except NotFoundError:
            return None
        imdbID = company.imdbID
        if imdbID is not None:
            return '%07d' % imdbID
        n_dict = {'name': company.name, 'country': company.countryCode}
        namline = build_company_name(n_dict)
        imdbID = self.company2imdbID(namline)
        if imdbID is not None and not isinstance(imdbID, list):
            try:
                company.imdbID = int(imdbID)
            except:
                pass
        return imdbID

    def do_adult_search(self, doAdult):
        """If set to 0 or False, movies in the Adult category are not
        episodeOf = title_dict.get('episode of')
        shown in the results of a search."""
        self.doAdult = doAdult

    def _search_movie(self, title, results, _episodes=False):
        title = title.strip()
        if not title:
            return []
        title_dict = analyze_title(title, canonical=1)
        s_title = title_dict['title']
        if not s_title:
            return []
        episodeOf = title_dict.get('episode of')
        if episodeOf:
            _episodes = False
        s_title_split = s_title.split(', ')
        if len(s_title_split) > 1 and \
                s_title_split[-1].lower() in _unicodeArticles:
            s_title_rebuilt = ', '.join(s_title_split[:-1])
            if s_title_rebuilt:
                s_title = s_title_rebuilt

        soundexCode = soundex(s_title)

        # XXX: improve the search restricting the kindID if the
        #      "kind" of the input differs from "movie"?
        condition = conditionAka = None
        if _episodes:
            condition = AND(Title.q.phoneticCode == soundexCode,
                            Title.q.kindID == self._kindRev['episode'])
            conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
                               AkaTitle.q.kindID == self._kindRev['episode'])
        elif title_dict['kind'] == 'episode' and episodeOf is not None:
            # set canonical=0 ?  Should not make much difference.
            series_title = build_title(episodeOf, canonical=1)
            # XXX: is it safe to get "results" results?
            #      Too many?  Too few?
            serRes = results
            if serRes < 3 or serRes > 10:
                serRes = 10
            searchSeries = self._search_movie(series_title, serRes)
            seriesIDs = [result[0] for result in searchSeries]
            if seriesIDs:
                condition = AND(Title.q.phoneticCode == soundexCode,
                                IN(Title.q.episodeOfID, seriesIDs),
                                Title.q.kindID == self._kindRev['episode'])
                conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
                                   IN(AkaTitle.q.episodeOfID, seriesIDs),
                                   AkaTitle.q.kindID == self._kindRev['episode'])
            else:
                # XXX: bad situation: we have found no matching series;
                #      try searching everything (both episodes and
                #      non-episodes) for the title.
                condition = AND(Title.q.phoneticCode == soundexCode,
                                IN(Title.q.episodeOfID, seriesIDs))
                conditionAka = AND(AkaTitle.q.phoneticCode == soundexCode,
                                   IN(AkaTitle.q.episodeOfID, seriesIDs))
        if condition is None:
            # XXX: excludes episodes?
            condition = AND(Title.q.kindID != self._kindRev['episode'],
                            Title.q.phoneticCode == soundexCode)
            conditionAka = AND(AkaTitle.q.kindID != self._kindRev['episode'],
                               AkaTitle.q.phoneticCode == soundexCode)

        # Up to 3 variations of the title are searched, plus the
        # long imdb canonical title, if provided.
        if not _episodes:
            title1, title2, title3 = titleVariations(title)
        else:
            title1 = title
            title2 = ''
            title3 = ''
        try:
            qr = [(q.id, get_movie_data(q.id, self._kind))
                  for q in Title.select(condition)]
            q2 = [(q.movieID, get_movie_data(q.id, self._kind, fromAka=1))
                  for q in AkaTitle.select(conditionAka)]
            qr += q2
        except NotFoundError as e:
            raise IMDbDataAccessError(
                'unable to search the database: "%s"' % str(e))

        resultsST = results * 3
        res = scan_titles(qr, title1, title2, title3, resultsST,
                          searchingEpisode=episodeOf is not None,
                          onlyEpisodes=_episodes,
                          ro_thresold=0.0)
        res[:] = [x[1] for x in res]

        if res and not self.doAdult:
            mids = [x[0] for x in res]
            genreID = self._infoRev['genres']
            adultlist = [al.movieID for al
                         in MovieInfo.select(
                         AND(MovieInfo.q.infoTypeID == genreID,
                             MovieInfo.q.info == 'Adult',
                             IN(MovieInfo.q.movieID, mids)))]
            res[:] = [x for x in res if x[0] not in adultlist]

        new_res = []
        # XXX: can there be duplicates?
        for r in res:
            if r not in q2:
                new_res.append(r)
                continue
            mdict = r[1]
            aka_title = build_title(mdict, ptdf=1)
            orig_dict = get_movie_data(r[0], self._kind)
            orig_title = build_title(orig_dict, ptdf=1)
            if aka_title == orig_title:
                new_res.append(r)
                continue
            orig_dict['akas'] = [aka_title]
            new_res.append((r[0], orig_dict))
        if results > 0:
            new_res[:] = new_res[:results]
        return new_res

    def _search_movie_advanced(self, title=None, adult=None, results=None, sort=None, sort_dir=None):
        return self._search_movie(title, results)

    def _search_episode(self, title, results):
        return self._search_movie(title, results, _episodes=True)

    def get_movie_main(self, movieID):
        # Every movie information is retrieved from here.
        infosets = self.get_movie_infoset()
        try:
            res = get_movie_data(movieID, self._kind)
        except NotFoundError as e:
            raise IMDbDataAccessError(
                'unable to get movieID "%s": "%s"' % (movieID, str(e)))
        if not res:
            raise IMDbDataAccessError('unable to get movieID "%s"' % movieID)
        # Collect cast information.
        castdata = [[cd.personID, cd.personRoleID, cd.note, cd.nrOrder,
                    self._role[cd.roleID]]
                    for cd in CastInfo.select(CastInfo.q.movieID == movieID)]
        for p in castdata:
            person = Name.get(p[0])
            p += [person.name, person.imdbIndex]
            if p[4] in ('actor', 'actress'):
                p[4] = 'cast'
        # Regroup by role/duty (cast, writer, director, ...)
        castdata[:] = _groupListBy(castdata, 4)
        for group in castdata:
            duty = group[0][4]
            for pdata in group:
                curRole = pdata[1]
                curRoleID = None
                if curRole is not None:
                    robj = CharName.get(curRole)
                    curRole = robj.name
                    curRoleID = robj.id
                p = Person(personID=pdata[0], name=pdata[5],
                           currentRole=curRole or '',
                           roleID=curRoleID,
                           notes=pdata[2] or '',
                           accessSystem='sql')
                if pdata[6]:
                    p['imdbIndex'] = pdata[6]
                p.billingPos = pdata[3]
                res.setdefault(duty, []).append(p)
            if duty == 'cast':
                res[duty] = merge_roles(res[duty])
            res[duty].sort()
        # Info about the movie.
        minfo = [(self._info[m.infoTypeID], m.info, m.note)
                 for m in MovieInfo.select(MovieInfo.q.movieID == movieID)]
        minfo += [('keywords', Keyword.get(m.keywordID).keyword, None)
                  for m in MovieKeyword.select(MovieKeyword.q.movieID == movieID)]
        minfo = _groupListBy(minfo, 0)
        for group in minfo:
            sect = group[0][0]
            for mdata in group:
                data = mdata[1]
                if mdata[2]:
                    data += '::%s' % mdata[2]
                res.setdefault(sect, []).append(data)
        # Companies info about a movie.
        cinfo = [(self._compType[m.companyTypeID], m.companyID, m.note) for m
                 in MovieCompanies.select(MovieCompanies.q.movieID == movieID)]
        cinfo = _groupListBy(cinfo, 0)
        for group in cinfo:
            sect = group[0][0]
            for mdata in group:
                cDb = CompanyName.get(mdata[1])
                cDbTxt = cDb.name
                if cDb.countryCode:
                    cDbTxt += ' %s' % cDb.countryCode
                company = Company(name=cDbTxt,
                                  companyID=mdata[1],
                                  notes=mdata[2] or '',
                                  accessSystem=self.accessSystem)
                res.setdefault(sect, []).append(company)
        # AKA titles.
        akat = [(get_movie_data(at.id, self._kind, fromAka=1), at.note)
                for at in AkaTitle.select(AkaTitle.q.movieID == movieID)]
        if akat:
            res['akas'] = []
            for td, note in akat:
                nt = build_title(td, ptdf=1)
                if note:
                    net = self._changeAKAencoding(note, nt)
                    if net is not None:
                        nt = net
                    nt += '::%s' % note
                if nt not in res['akas']:
                    res['akas'].append(nt)
        # Complete cast/crew.
        compcast = [(self._compcast[cc.subjectID], self._compcast[cc.statusID])
                    for cc in CompleteCast.select(CompleteCast.q.movieID == movieID)]
        if compcast:
            for entry in compcast:
                val = str(entry[1])
                res['complete %s' % entry[0]] = val
        # Movie connections.
        mlinks = [[ml.linkedMovieID, self._link[ml.linkTypeID]]
                  for ml in MovieLink.select(MovieLink.q.movieID == movieID)]
        if mlinks:
            for ml in mlinks:
                lmovieData = get_movie_data(ml[0], self._kind)
                if lmovieData:
                    m = Movie(movieID=ml[0], data=lmovieData, accessSystem='sql')
                    ml[0] = m
            res['connections'] = {}
            mlinks[:] = _groupListBy(mlinks, 1)
            for group in mlinks:
                lt = group[0][1]
                res['connections'][lt] = [i[0] for i in group]
        # Episodes.
        episodes = {}
        eps_list = list(Title.select(Title.q.episodeOfID == movieID))
        eps_list.sort(key=lambda e: '%s.%s' % (e.seasonNr or '', e.episodeNr or ''))
        if eps_list:
            ps_data = {'title': res['title'], 'kind': res['kind'],
                       'year': res.get('year'),
                       'imdbIndex': res.get('imdbIndex')}
            parentSeries = Movie(movieID=movieID, data=ps_data,
                                 accessSystem='sql')
            for episode in eps_list:
                episodeID = episode.id
                episode_data = get_movie_data(episodeID, self._kind)
                m = Movie(movieID=episodeID, data=episode_data,
                          accessSystem='sql')
                m['episode of'] = parentSeries
                season = episode_data.get('season', 'UNKNOWN')
                if season not in episodes:
                    episodes[season] = {}
                ep_number = episode_data.get('episode')
                if ep_number is None:
                    ep_number = max((list(episodes[season].keys()) or [0])) + 1
                episodes[season][ep_number] = m
            res['episodes'] = episodes
            res['number of episodes'] = sum([len(x) for x in list(episodes.values())])
            res['number of seasons'] = len(list(episodes.keys()))
        # Regroup laserdisc information.
        res = _reGroupDict(res, self._moviesubs)
        # Do some transformation to preserve consistency with other
        # data access systems.
        if 'quotes' in res:
            for idx, quote in enumerate(res['quotes']):
                res['quotes'][idx] = quote.split('::')
        if 'runtimes' in res and len(res['runtimes']) > 0:
            rt = res['runtimes'][0]
            episodes = re_episodes.findall(rt)
            if episodes:
                res['runtimes'][0] = re_episodes.sub('', rt)
                if res['runtimes'][0][-2:] == '::':
                    res['runtimes'][0] = res['runtimes'][0][:-2]
        if 'votes' in res:
            res['votes'] = int(res['votes'][0])
        if 'rating' in res:
            res['rating'] = float(res['rating'][0])
        if 'votes distribution' in res:
            res['votes distribution'] = res['votes distribution'][0]
        if 'mpaa' in res:
            res['mpaa'] = res['mpaa'][0]
        if 'top 250 rank' in res:
            try:
                res['top 250 rank'] = int(res['top 250 rank'])
            except:
                pass
        if 'bottom 10 rank' in res:
            try:
                res['bottom 100 rank'] = int(res['bottom 10 rank'])
            except:
                pass
            del res['bottom 10 rank']
        for old, new in [('guest', 'guests'), ('trademarks', 'trade-mark'),
                         ('articles', 'article'), ('pictorials', 'pictorial'),
                         ('magazine-covers', 'magazine-cover-photo')]:
            if old in res:
                res[new] = res[old]
                del res[old]
        trefs, nrefs = {}, {}
        trefs, nrefs = self._extractRefs(sub_dict(res, Movie.keys_tomodify_list))
        return {'data': res, 'titlesRefs': trefs, 'namesRefs': nrefs,
                'info sets': infosets}

    # Just to know what kind of information are available.
    get_movie_alternate_versions = get_movie_main
    get_movie_business = get_movie_main
    get_movie_connections = get_movie_main
    get_movie_crazy_credits = get_movie_main
    get_movie_goofs = get_movie_main
    get_movie_keywords = get_movie_main
    get_movie_literature = get_movie_main
    get_movie_locations = get_movie_main
    get_movie_plot = get_movie_main
    get_movie_quotes = get_movie_main
    get_movie_release_dates = get_movie_main
    get_movie_soundtrack = get_movie_main
    get_movie_taglines = get_movie_main
    get_movie_technical = get_movie_main
    get_movie_trivia = get_movie_main
    get_movie_vote_details = get_movie_main
    get_movie_episodes = get_movie_main

    def _search_person(self, name, results):
        name = name.strip()
        if not name:
            return []
        s_name = analyze_name(name)['name']
        if not s_name:
            return []
        soundexCode = soundex(s_name)
        name1, name2, name3 = nameVariations(name)

        # If the soundex is None, compare only with the first
        # phoneticCode column.
        if soundexCode is not None:
            condition = IN(soundexCode, [Name.q.namePcodeCf,
                                         Name.q.namePcodeNf,
                                         Name.q.surnamePcode])
            conditionAka = IN(soundexCode, [AkaName.q.namePcodeCf,
                                            AkaName.q.namePcodeNf,
                                            AkaName.q.surnamePcode])
        else:
            condition = ISNULL(Name.q.namePcodeCf)
            conditionAka = ISNULL(AkaName.q.namePcodeCf)

        try:
            qr = [(q.id, {'name': q.name, 'imdbIndex': q.imdbIndex})
                  for q in Name.select(condition)]

            q2 = [(q.personID, {'name': q.name, 'imdbIndex': q.imdbIndex})
                  for q in AkaName.select(conditionAka)]
            qr += q2
        except NotFoundError as e:
            raise IMDbDataAccessError(
                'unable to search the database: "%s"' % str(e))

        res = scan_names(qr, name1, name2, name3, results)
        res[:] = [x[1] for x in res]
        # Purge empty imdbIndex.
        returnl = []
        for x in res:
            tmpd = x[1]
            if tmpd['imdbIndex'] is None:
                del tmpd['imdbIndex']
            returnl.append((x[0], tmpd))

        new_res = []
        # XXX: can there be duplicates?
        for r in returnl:
            if r not in q2:
                new_res.append(r)
                continue
            pdict = r[1]
            aka_name = build_name(pdict, canonical=1)
            p = Name.get(r[0])
            orig_dict = {'name': p.name, 'imdbIndex': p.imdbIndex}
            if orig_dict['imdbIndex'] is None:
                del orig_dict['imdbIndex']
            orig_name = build_name(orig_dict, canonical=1)
            if aka_name == orig_name:
                new_res.append(r)
                continue
            orig_dict['akas'] = [aka_name]
            new_res.append((r[0], orig_dict))
        if results > 0:
            new_res[:] = new_res[:results]

        return new_res

    def get_person_main(self, personID):
        # Every person information is retrieved from here.
        infosets = self.get_person_infoset()
        try:
            p = Name.get(personID)
        except NotFoundError as e:
            raise IMDbDataAccessError(
                'unable to get personID "%s": "%s"' % (personID, str(e)))
        res = {'name': p.name, 'imdbIndex': p.imdbIndex}
        if res['imdbIndex'] is None:
            del res['imdbIndex']
        if not res:
            raise IMDbDataAccessError('unable to get personID "%s"' % personID)
        # Collect cast information.
        castdata = [(cd.movieID, cd.personRoleID, cd.note,
                    self._role[cd.roleID],
                    get_movie_data(cd.movieID, self._kind))
                    for cd in CastInfo.select(CastInfo.q.personID == personID)]
        # Regroup by role/duty (cast, writer, director, ...)
        castdata[:] = _groupListBy(castdata, 3)
        episodes = {}
        seenDuties = []
        for group in castdata:
            for mdata in group:
                duty = orig_duty = group[0][3]
                if duty not in seenDuties:
                    seenDuties.append(orig_duty)
                note = mdata[2] or ''
                if 'episode of' in mdata[4]:
                    duty = 'episodes'
                    if orig_duty not in ('actor', 'actress'):
                        if note:
                            note = ' %s' % note
                        note = '[%s]%s' % (orig_duty, note)
                curRole = mdata[1]
                curRoleID = None
                if curRole is not None:
                    robj = CharName.get(curRole)
                    curRole = robj.name
                    curRoleID = robj.id
                m = Movie(movieID=mdata[0], data=mdata[4],
                          currentRole=curRole or '',
                          roleID=curRoleID,
                          notes=note, accessSystem='sql')
                if duty != 'episodes':
                    res.setdefault(duty, []).append(m)
                else:
                    episodes.setdefault(m['episode of'], []).append(m)
        if episodes:
            for k in episodes:
                episodes[k].sort()
                episodes[k].reverse()
            res['episodes'] = episodes
        for duty in seenDuties:
            if duty in res:
                if duty in ('actor', 'actress', 'himself', 'herself',
                            'themselves'):
                    res[duty] = merge_roles(res[duty])
                res[duty].sort()
        # Info about the person.
        pinfo = [(self._info[pi.infoTypeID], pi.info, pi.note)
                 for pi in PersonInfo.select(PersonInfo.q.personID == personID)]
        # Regroup by duty.
        pinfo = _groupListBy(pinfo, 0)
        for group in pinfo:
            sect = group[0][0]
            for pdata in group:
                data = pdata[1]
                if pdata[2]:
                    data += '::%s' % pdata[2]
                res.setdefault(sect, []).append(data)
        # AKA names.
        akan = [(an.name, an.imdbIndex)
                for an in AkaName.select(AkaName.q.personID == personID)]
        if akan:
            res['akas'] = []
            for n in akan:
                nd = {'name': n[0]}
                if n[1]:
                    nd['imdbIndex'] = n[1]
                nt = build_name(nd, canonical=1)
                res['akas'].append(nt)
        # Do some transformation to preserve consistency with other
        # data access systems.
        for key in ('birth date', 'birth notes', 'death date', 'death notes',
                    'birth name', 'height'):
            if key in res:
                res[key] = res[key][0]
        if 'guest' in res:
            res['notable tv guest appearances'] = res['guest']
            del res['guest']
        miscnames = res.get('nick names', [])
        if 'birth name' in res:
            miscnames.append(res['birth name'])
        if 'akas' in res:
            for mname in miscnames:
                if mname in res['akas']:
                    res['akas'].remove(mname)
            if not res['akas']:
                del res['akas']
        trefs, nrefs = self._extractRefs(sub_dict(res, Person.keys_tomodify_list))
        return {'data': res, 'titlesRefs': trefs, 'namesRefs': nrefs,
                'info sets': infosets}

    # Just to know what kind of information are available.
    get_person_filmography = get_person_main
    get_person_biography = get_person_main
    get_person_other_works = get_person_main
    get_person_episodes = get_person_main

    def _search_character(self, name, results):
        name = name.strip()
        if not name:
            return []
        s_name = analyze_name(name)['name']
        if not s_name:
            return []
        s_name = normalizeName(s_name)
        soundexCode = soundex(s_name)
        surname = s_name.split(' ')[-1]
        surnameSoundex = soundex(surname)
        name2 = ''
        soundexName2 = None
        nsplit = s_name.split()
        if len(nsplit) > 1:
            name2 = '%s %s' % (nsplit[-1], ' '.join(nsplit[:-1]))
            if s_name == name2:
                name2 = ''
            else:
                soundexName2 = soundex(name2)
        # If the soundex is None, compare only with the first
        # phoneticCode column.
        if soundexCode is not None:
            if soundexName2 is not None:
                condition = OR(surnameSoundex == CharName.q.surnamePcode,
                               IN(CharName.q.namePcodeNf, [soundexCode,
                                                           soundexName2]),
                               IN(CharName.q.surnamePcode, [soundexCode,
                                                            soundexName2]))
            else:
                condition = OR(surnameSoundex == CharName.q.surnamePcode,
                               IN(soundexCode, [CharName.q.namePcodeNf,
                                                CharName.q.surnamePcode]))
        else:
            condition = ISNULL(Name.q.namePcodeNf)
        try:
            qr = [(q.id, {'name': q.name, 'imdbIndex': q.imdbIndex})
                  for q in CharName.select(condition)]
        except NotFoundError as e:
            raise IMDbDataAccessError(
                'unable to search the database: "%s"' % str(e))
        res = scan_names(qr, s_name, name2, '', results,
                         _scan_character=True)
        res[:] = [x[1] for x in res]
        # Purge empty imdbIndex.
        returnl = []
        for x in res:
            tmpd = x[1]
            if tmpd['imdbIndex'] is None:
                del tmpd['imdbIndex']
            returnl.append((x[0], tmpd))
        return returnl

    def get_character_main(self, characterID, results=1000):
        # Every character information is retrieved from here.
        infosets = self.get_character_infoset()
        try:
            c = CharName.get(characterID)
        except NotFoundError as e:
            raise IMDbDataAccessError(
                'unable to get characterID "%s": "%s"' % (characterID, e))
        res = {'name': c.name, 'imdbIndex': c.imdbIndex}
        if res['imdbIndex'] is None:
            del res['imdbIndex']
        if not res:
            raise IMDbDataAccessError('unable to get characterID "%s"' %
                                      characterID)
        # Collect filmography information.
        items = CastInfo.select(CastInfo.q.personRoleID == characterID)
        if results > 0:
            items = items[:results]
        filmodata = [(cd.movieID, cd.personID, cd.note,
                      get_movie_data(cd.movieID, self._kind)) for cd in items
                     if self._role[cd.roleID] in ('actor', 'actress')]
        fdata = []
        for f in filmodata:
            curRole = None
            curRoleID = f[1]
            note = f[2] or ''
            if curRoleID is not None:
                robj = Name.get(curRoleID)
                curRole = robj.name
            m = Movie(movieID=f[0], data=f[3],
                      currentRole=curRole or '',
                      roleID=curRoleID, roleIsPerson=True,
                      notes=note, accessSystem='sql')
            fdata.append(m)
        fdata = merge_roles(fdata)
        fdata.sort()
        if fdata:
            res['filmography'] = fdata
        return {'data': res, 'info sets': infosets}

    get_character_filmography = get_character_main
    get_character_biography = get_character_main

    def _search_company(self, name, results):
        name = name.strip()
        if not name:
            return []
        soundexCode = soundex(name)
        # If the soundex is None, compare only with the first
        # phoneticCode column.
        if soundexCode is None:
            condition = ISNULL(CompanyName.q.namePcodeNf)
        else:
            if name.endswith(']'):
                condition = CompanyName.q.namePcodeSf == soundexCode
            else:
                condition = CompanyName.q.namePcodeNf == soundexCode
        try:
            qr = [(q.id, {'name': q.name, 'country': q.countryCode})
                  for q in CompanyName.select(condition)]
        except NotFoundError as e:
            raise IMDbDataAccessError(
                'unable to search the database: "%s"' % str(e))
        qr[:] = [(x[0], build_company_name(x[1])) for x in qr]
        res = scan_company_names(qr, name, results)
        res[:] = [x[1] for x in res]
        # Purge empty country keys.
        returnl = []
        for x in res:
            tmpd = x[1]
            country = tmpd.get('country')
            if country is None and 'country' in tmpd:
                del tmpd['country']
            returnl.append((x[0], tmpd))
        return returnl

    def get_company_main(self, companyID, results=0):
        # Every company information is retrieved from here.
        infosets = self.get_company_infoset()
        try:
            c = CompanyName.get(companyID)
        except NotFoundError as e:
            raise IMDbDataAccessError(
                'unable to get companyID "%s": "%s"' % (companyID, e))
        res = {'name': c.name, 'country': c.countryCode}
        if res['country'] is None:
            del res['country']
        if not res:
            raise IMDbDataAccessError('unable to get companyID "%s"' %
                                      companyID)
        # Collect filmography information.
        items = MovieCompanies.select(MovieCompanies.q.companyID == companyID)
        if results > 0:
            items = items[:results]
        filmodata = [(cd.movieID, cd.companyID,
                      self._compType[cd.companyTypeID], cd.note,
                      get_movie_data(cd.movieID, self._kind)) for cd in items]
        filmodata = _groupListBy(filmodata, 2)
        for group in filmodata:
            ctype = group[0][2]
            for movieID, companyID, ctype, note, movieData in group:
                movie = Movie(data=movieData, movieID=movieID,
                              notes=note or '', accessSystem=self.accessSystem)
                res.setdefault(ctype, []).append(movie)
            res.get(ctype, []).sort()
        return {'data': res, 'info sets': infosets}

    def _search_keyword(self, keyword, results):
        constr = OR(Keyword.q.phoneticCode == soundex(keyword),
                    CONTAINSSTRING(Keyword.q.keyword, self.toUTF8(keyword)))
        return filterSimilarKeywords(keyword,
                                     _iterKeywords(Keyword.select(constr)))[:results]

    def _get_keyword(self, keyword, results, page=None):
        keyID = Keyword.select(Keyword.q.keyword == keyword)
        if keyID.count() == 0:
            return []
        keyID = keyID[0].id
        movies = MovieKeyword.select(MovieKeyword.q.keywordID ==
                                     keyID)[:results]
        return [(m.movieID, get_movie_data(m.movieID, self._kind))
                for m in movies]

    def _get_top_bottom_movies(self, kind):
        if kind == 'top':
            kind = 'top 250 rank'
        elif kind == 'bottom':
            # Not a refuse: the plain text data files contains only
            # the bottom 10 movies.
            kind = 'bottom 10 rank'
        else:
            return []
        infoID = InfoType.select(InfoType.q.info == kind)
        if infoID.count() == 0:
            return []
        infoID = infoID[0].id
        movies = MovieInfo.select(MovieInfo.q.infoTypeID == infoID)
        ml = []
        for m in movies:
            minfo = get_movie_data(m.movieID, self._kind)
            for k in kind, 'votes', 'rating', 'votes distribution':
                valueDict = getSingleInfo(MovieInfo, m.movieID,
                                          k, notAList=True)
                if k in (kind, 'votes') and k in valueDict:
                    valueDict[k] = int(valueDict[k])
                elif k == 'rating' and k in valueDict:
                    valueDict[k] = float(valueDict[k])
                minfo.update(valueDict)
            ml.append((m.movieID, minfo))
        sorter = (_cmpBottom, _cmpTop)[kind == 'top 250 rank']
        ml.sort(sorter)
        return ml

    def __del__(self):
        """Ensure that the connection is closed."""
        # TODO: on Python 3, using mysql+pymysql, raises random exceptions;
        # for now, skip it and hope it's garbage-collected.
        return
        if not hasattr(self, '_connection'):
            return
        logger.debug('closing connection to the database')
        try:
            self._connection.close()
        except:
            pass