veekun_pokedex/pokedex/db/__init__.py

# encoding: utf-8
import re

from sqlalchemy import engine_from_config, orm

from hashlib import md5

from ..defaults import get_default_db_uri
from .tables import Language, metadata
from .multilang import MultilangSession, MultilangScopedSession

ENGLISH_ID = 9


def connect(uri=None, session_args={}, engine_args={}, engine_prefix=''):
    """Connects to the requested URI.  Returns a session object.

    With the URI omitted, attempts to connect to a default SQLite database
    contained within the package directory.

    Calling this function also binds the metadata object to the created engine.
    """

    # If we didn't get a uri, fall back to the default
    if uri is None:
        uri = engine_args.get(engine_prefix + 'url', None)
    if uri is None:
        uri = get_default_db_uri()

    ### Do some fixery for MySQL
    if uri.startswith('mysql:'):
        # MySQL uses latin1 for connections by default even if the server is
        # otherwise oozing with utf8; charset fixes this
        if 'charset' not in uri:
            uri += '?charset=utf8'

        # Tables should be InnoDB, in the event that we're creating them, and
        # use UTF-8 goddammit!
        for table in metadata.tables.values():
            table.kwargs['mysql_engine'] = 'InnoDB'
            table.kwargs['mysql_charset'] = 'utf8'

    ### Do some fixery for Oracle
    if uri.startswith('oracle:') or uri.startswith('oracle+cx_oracle:'):
        # Oracle requires auto_setinputsizes=False (or at least a special
        # set of exclusions from it, which I don't know)
        if 'auto_setinputsizes' not in uri:
            uri += '?auto_setinputsizes=FALSE'

        # Shorten table names, Oracle limits table and column names to 30 chars
        # Easy solution : drop the vowels, differents words are unlikely to
        # end up the same after the vowels are gone
        for table in metadata.tables.values():
            table._orginal_name = table.name[:]
            if len(table.name) > 30:
                for letter in ['a', 'e', 'i', 'o', 'u', 'y']:
                    table.name=table.name.replace(letter,'')
            # Aggressive renaming if the length is still too long:
            # Take the initials of the table, add a hash to make a new name
            if len(table.name) > 30:
                hashedname = md5(table._orginal_name).hexdigest()
                shortname = ''.join(word[:1] for word in table.name.split('_'))
                shortname = ''.join([shortname, hashedname])
                table.name = shortname[:30]

    ### Connect
    engine_args[engine_prefix + 'url'] = uri
    engine = engine_from_config(engine_args, prefix=engine_prefix)
    conn = engine.connect()
    metadata.bind = engine

    all_session_args = dict(autoflush=True, autocommit=False, bind=engine)
    all_session_args.update(session_args)
    sm = orm.sessionmaker(class_=MultilangSession,
        default_language_id=ENGLISH_ID, **all_session_args)
    session = MultilangScopedSession(sm)

    return session

def identifier_from_name(name):
    """Make a string safe to use as an identifier.

    Valid characters are lowercase alphanumerics and "-". This function may
    raise ValueError if it can't come up with a suitable identifier.

    This function is useful for scripts which add things with names.
    """
    if isinstance(name, str):
        identifier = name.decode('utf-8')
    else:
        identifier = name
    identifier = identifier.lower()
    identifier = identifier.replace(u'+', u' plus ')
    identifier = re.sub(u'[ _–]+', u'-', identifier)
    identifier = re.sub(u"['./;’(),:]", u'', identifier)
    identifier = identifier.replace(u'é', u'e')
    identifier = identifier.replace(u'♀', u'-f')
    identifier = identifier.replace(u'♂', u'-m')
    if identifier in (u'???', u'????'):
        identifier = u'unknown'
    elif identifier == u'!':
        identifier = u'exclamation'
    elif identifier == u'?':
        identifier = u'question'

    if not identifier.replace(u"-", u"").isalnum():
        raise ValueError(identifier)
    return identifier
-												Add identifier_from_name() function.

Adapted from name2ident() in scripts/migration-i18n.py.

											
										
										
											2011-04-03 11:49:34 +00:00
+								# encoding: utf-8
 								import re
-												Remove the responsibility of setting a default language from multilang.

Caller now has to do it.  No need to avoid circular deps, no need to do
much of anything at all.

											
										
										
											2011-04-06 04:03:41 +00:00
+								from sqlalchemy import engine_from_config, orm
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
-												Aggressive table renaming, in case dropping the vowels still doesn't bring the length under 30

											
										
										
											2013-12-18 23:36:43 +00:00
+								from hashlib import md5
-												Factor out logic for finding the default db/index.  #180

Note: `if not x:` has changed to `if x is not None:`, changing the
semantics slightly.  Shouldn't be a big issue.

											
										
										
											2010-05-13 17:33:07 +00:00
+								from ..defaults import get_default_db_uri
-												Remove the responsibility of setting a default language from multilang.

Caller now has to do it.  No need to avoid circular deps, no need to do
much of anything at all.

											
										
										
											2011-04-06 04:03:41 +00:00
+								from .tables import Language, metadata
-												Match default language by id, not identifier.

											
										
										
											2011-03-30 03:15:41 +00:00
+								from .multilang import MultilangSession, MultilangScopedSession
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
-												Fix default language assignment once and for all.

Stop trying to be clever and magical and just make the caller pass in a
damn primary key.

											
										
										
											2011-04-10 07:54:14 +00:00
+								ENGLISH_ID = 9
-												Factor out logic for finding the default db/index.  #180

Note: `if not x:` has changed to `if x is not None:`, changing the
semantics slightly.  Shouldn't be a big issue.

											
										
										
											2010-05-13 17:33:07 +00:00
-												Use engine_from_config for database connections.  #390

											
										
										
											2011-01-31 06:29:23 +00:00
+								def connect(uri=None, session_args={}, engine_args={}, engine_prefix=''):
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
+								    """Connects to the requested URI.  Returns a session object.
-												Vastly improved the pokedex import/export UI.

csvimport is now load; csvexport is now dump.

Both take an optional -e switch to specify an engine, but will happily
use a default SQLite database in the pokedex package directory.

Additionally, the CSV directory is now controlled by the optional -d
switch, and defaults to Doing The Right Thing.

So `pokedex load` now does exactly what you'd expect: loads the data
from the right files into a consistently-located database.

											
										
										
											2009-08-19 01:02:53 +00:00
+								    With the URI omitted, attempts to connect to a default SQLite database
 								    contained within the package directory.
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
+								    Calling this function also binds the metadata object to the created engine.
 								    """
-												Factor out logic for finding the default db/index.  #180

Note: `if not x:` has changed to `if x is not None:`, changing the
semantics slightly.  Shouldn't be a big issue.

											
										
										
											2010-05-13 17:33:07 +00:00
+								    # If we didn't get a uri, fall back to the default
-												Use engine_from_config for database connections.  #390

											
										
										
											2011-01-31 06:29:23 +00:00
+								    if uri is None:
-												Fix connect() without arguments

											
										
										
											2011-03-12 14:46:04 +00:00
+								        uri = engine_args.get(engine_prefix + 'url', None)
-												Factor out logic for finding the default db/index.  #180

Note: `if not x:` has changed to `if x is not None:`, changing the
semantics slightly.  Shouldn't be a big issue.

											
										
										
											2010-05-13 17:33:07 +00:00
+								    if uri is None:
 								        uri = get_default_db_uri()
-												Vastly improved the pokedex import/export UI.

csvimport is now load; csvexport is now dump.

Both take an optional -e switch to specify an engine, but will happily
use a default SQLite database in the pokedex package directory.

Additionally, the CSV directory is now controlled by the optional -d
switch, and defaults to Doing The Right Thing.

So `pokedex load` now does exactly what you'd expect: loads the data
from the right files into a consistently-located database.

											
										
										
											2009-08-19 01:02:53 +00:00
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
+								    ### Do some fixery for MySQL
-												Make load.py more idiomatic.

Also snuck an idiom into db/__init__.py.

											
										
										
											2011-04-03 11:26:45 +00:00
+								    if uri.startswith('mysql:'):
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
+								        # MySQL uses latin1 for connections by default even if the server is
 								        # otherwise oozing with utf8; charset fixes this
 								        if 'charset' not in uri:
 								            uri += '?charset=utf8'
-												Fixed some MySQL import problems.

Tables weren't being defined as UTF-8 if that wasn't the server default.

A lot of tables were trying to create erroneous auto_increment columns.

Foreign key checks were pretty much fucking everything up.

											
										
										
											2009-03-08 02:54:01 +00:00
+								        # Tables should be InnoDB, in the event that we're creating them, and
 								        # use UTF-8 goddammit!
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
+								        for table in metadata.tables.values():
 								            table.kwargs['mysql_engine'] = 'InnoDB'
-												Fixed some MySQL import problems.

Tables weren't being defined as UTF-8 if that wasn't the server default.

A lot of tables were trying to create erroneous auto_increment columns.

Foreign key checks were pretty much fucking everything up.

											
										
										
											2009-03-08 02:54:01 +00:00
+								            table.kwargs['mysql_charset'] = 'utf8'
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
-												Support for Oracle: auto-shorten long table names, use UnicodeText() instead of Unicode(4000) because of size limitations for the mapped datatypes in Oracle

											
										
										
											2013-12-18 13:32:13 +00:00
+								    ### Do some fixery for Oracle
 								    if uri.startswith('oracle:') or uri.startswith('oracle+cx_oracle:'):
 								        # Oracle requires auto_setinputsizes=False (or at least a special
 								        # set of exclusions from it, which I don't know)
 								        if 'auto_setinputsizes' not in uri:
 								            uri += '?auto_setinputsizes=FALSE'
 								        # Shorten table names, Oracle limits table and column names to 30 chars
 								        # Easy solution : drop the vowels, differents words are unlikely to
 								        # end up the same after the vowels are gone
 								        for table in metadata.tables.values():
-												Removed use of table.description, add a _original_name to it instead

											
										
										
											2013-12-18 22:37:03 +00:00
+								            table._orginal_name = table.name[:]
-												Support for Oracle: auto-shorten long table names, use UnicodeText() instead of Unicode(4000) because of size limitations for the mapped datatypes in Oracle

											
										
										
											2013-12-18 13:32:13 +00:00
+								            if len(table.name) > 30:
 								                for letter in ['a', 'e', 'i', 'o', 'u', 'y']:
 								                    table.name=table.name.replace(letter,'')
-												Aggressive table renaming, in case dropping the vowels still doesn't bring the length under 30

											
										
										
											2013-12-18 23:36:43 +00:00
+								            # Aggressive renaming if the length is still too long:
 								            # Take the initials of the table, add a hash to make a new name
 								            if len(table.name) > 30:
 								                hashedname = md5(table._orginal_name).hexdigest()
 								                shortname = ''.join(word[:1] for word in table.name.split('_'))
 								                shortname = ''.join([shortname, hashedname])
 								                table.name = shortname[:30]
-												Support for Oracle: auto-shorten long table names, use UnicodeText() instead of Unicode(4000) because of size limitations for the mapped datatypes in Oracle

											
										
										
											2013-12-18 13:32:13 +00:00
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
+								    ### Connect
-												Use engine_from_config for database connections.  #390

											
										
										
											2011-01-31 06:29:23 +00:00
+								    engine_args[engine_prefix + 'url'] = uri
 								    engine = engine_from_config(engine_args, prefix=engine_prefix)
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
+								    conn = engine.connect()
 								    metadata.bind = engine
-												Allow passing engine arguments to connect().

											
										
										
											2010-03-17 07:44:19 +00:00
+								    all_session_args = dict(autoflush=True, autocommit=False, bind=engine)
 								    all_session_args.update(session_args)
-												Fix default language assignment once and for all.

Stop trying to be clever and magical and just make the caller pass in a
damn primary key.

											
										
										
											2011-04-10 07:54:14 +00:00
+								    sm = orm.sessionmaker(class_=MultilangSession,
 								        default_language_id=ENGLISH_ID, **all_session_args)
-												Match default language by id, not identifier.

											
										
										
											2011-03-30 03:15:41 +00:00
+								    session = MultilangScopedSession(sm)
-												Initial commit, with much of the data imported.

Includes a wrapper script 'pokedex' that can, so far, read data from a
db and spit out CSVs or deploy CSVs to a db.

											
										
										
											2009-02-05 08:05:42 +00:00
 								    return session
-												Add identifier_from_name() function.

Adapted from name2ident() in scripts/migration-i18n.py.

											
										
										
											2011-04-03 11:49:34 +00:00
 								def identifier_from_name(name):
 								    """Make a string safe to use as an identifier.
 								    Valid characters are lowercase alphanumerics and "-". This function may
 								    raise ValueError if it can't come up with a suitable identifier.
 								    This function is useful for scripts which add things with names.
 								    """
 								    if isinstance(name, str):
 								        identifier = name.decode('utf-8')
 								    else:
 								        identifier = name
 								    identifier = identifier.lower()
 								    identifier = identifier.replace(u'+', u' plus ')
 								    identifier = re.sub(u'[ _–]+', u'-', identifier)
 								    identifier = re.sub(u"['./;’(),:]", u'', identifier)
 								    identifier = identifier.replace(u'é', u'e')
 								    identifier = identifier.replace(u'♀', u'-f')
 								    identifier = identifier.replace(u'♂', u'-m')
 								    if identifier in (u'???', u'????'):
 								        identifier = u'unknown'
 								    elif identifier == u'!':
 								        identifier = u'exclamation'
 								    elif identifier == u'?':
 								        identifier = u'question'
 								    if not identifier.replace(u"-", u"").isalnum():
 								        raise ValueError(identifier)
 								    return identifier