# -------------------------------------------------------------------
# - NAME: cleanup.py
# - AUTHOR: Reto Stauffer (IMGI@prognose2)
# - DATE: 2015-08-01
# -------------------------------------------------------------------
# - DESCRIPTION: A class handling the cleanup process.
# -------------------------------------------------------------------
# - EDITORIAL: 2015-08-01, RS: Created file on thinkreto.
# -------------------------------------------------------------------
# - L@ST MODIFIED: 2018-01-21 17:52 on marvin
# -------------------------------------------------------------------
# - New class
[docs]class cleanup(object):
"""Setting up the class to clean files and databases used for
processing incoming observations.
Args:
config (:obj:`str`): Name of the config file to read.
"""
# - Initialize the object
def __init__( self, config ):
print " Cleanup class initialized"
self.config = config
from database import database
print " Open database connection"
self.db = database(config)
# ----------------------------------------------------------------
# - Delete old files on disc
# ----------------------------------------------------------------
[docs] def delete_old_raw_files( self ):
"""
Method deleting files from disc in the directory 'outdir' as
defined in the config.conf file. We do NOT decide between
synop/bufr or processed/error here. Just kill them if they
are older than 'file_days' as specified in config.conf.
"""
import os, sys, time
from datetime import datetime as dt
import numpy as np
for dir in [self.config['essential_outdir'],self.config['additional_outdir']]:
days = self.config['cleanup_file_days']
postfix = self.config['cleanup_file_endings']
maxage = np.floor(time.time() / 86400)*86400 - days*86400
print ""
print " - Searching for old files in \"%s\" older than about %d days" % (dir,days)
print " Or exactly: files older than %s" % dt.fromtimestamp( maxage )
files = self.getOldFiles(dir,maxage,postfix)
# - No old files? Pff
if len(files) == 0:
print " No old files found. Skip this method. Done."
return True
# - Else delete these files
print " Found %d old files on disc" % len(files)
# - Delete em all
for file in files:
#eprint file
os.remove( file )
print " Old files removed from %s. Done." % dir
# ----------------------------------------------------------------
# - Helper function loading old files
# ----------------------------------------------------------------
[docs] def getOldFiles(self, dirPath, maxage, postfix):
"""
List old files on disc.
Args:
dirPath (:obj:`str`): Path to the directory which should be checked.
maxage (:obj:`int`): Timestamp, files older than this will
be considered to be old and marked for deletion.
postfix (:obj:`str`): File postfix. Only files where the postfix
matches (not case sensitive) will be considered.
Returns:
list: A list of all files under dirPath older than days.
"""
import sys, os, time, glob
present = time.time()
oldfiles = []
postfix = postfix.lower()
for root, dirs, files in os.walk(dirPath, topdown=False):
for name in dirs:
subDirPath = os.path.join(root, name)
for file in glob.glob("%s/*" % (subDirPath)):
# Checking file ending
tmp = file.split(".")[-1].lower()
if not tmp in postfix: continue
filePath = os.path.join(root,name,file)
if os.path.getmtime(filePath) < maxage:
oldfiles.append(filePath)
return oldfiles
# ----------------------------------------------------------------
# - Cleaning up the database
# ----------------------------------------------------------------
[docs] def live_database_to_archive(self):
"""
I would like to store some observation data longer than just
a few days - however - we wont create a copy of the WMO
observation data archive or simething. Therefore we are just
archiving some stations as defined in 'cleanup:stations' in
the config.conf file. Move them from 'cleanup:srctable' to
'cleanup:dsttable' (see config.conf file).
"""
print ""
print " - Migrate \"live\" database to \"archive\" database"
srctable = self.config['cleanup_srctable']
dsttable = self.config['cleanup_dsttable']
stations = self.config['cleanup_stations']
# - If one of both is None: skip
if not srctable or not dsttable:
print " In config.conf: srctable or dsttable in [cleanup]"
print " not set. Archive of data not wished. Return."
return True
print " From database: %s" % srctable
print " To database: %s" % dsttable
# - No stations
if len(stations) == 0:
print " But no stations defined in the config.conf file"
print " in [cleanup]. Seems that you dont want any"
print " observation data in the archive table. Return."
return True
print " Have to backup: %s stations" % len(stations)
# - Source table does not exist?
if not self.db.__does_table_exist__( srctable ):
print "[!] Source table %s does not exist! RETURN!\n" % srctable
return False
# - Check if table exists.
if not self.db.__does_table_exist__( dsttable ):
print "[!] Table does not exist, we have to create it first"
sql = "CREATE TABLE %s LIKE %s" % (dsttable,srctable)
cur = self.db.cursor()
cur.execute( sql )
self.db.commit()
else:
print " Table existing, migrate data ..."
# - Checking columns in both tables. All columns in 'srctable'
# have to exist in the 'dsttable'. Else altering the dsttable.
cur = self.db.cursor()
# - Loading src columns
cur.execute("SHOW COLUMNS FROM %s" % srctable)
coldef = cur.fetchall()
srccols = []
for x in coldef: srccols.append( x[0] )
# - Loading dst columns
cur.execute("SHOW COLUMNS FROM %s" % dsttable)
tmp = cur.fetchall()
dstcols = []
for x in tmp: dstcols.append( x[0] )
# - Checking columns
for col in srccols:
if col in dstcols: continue
# - Search config
print "[!] Column \"%s\" does not exist in table %s: ALTER" % (col,dsttable)
for rec in coldef:
if rec[0] == col:
# - Create alter statement
sql = 'ALTER TABLE %s ADD %s %s;' % (dsttable,rec[0],rec[1])
cur.execute(sql)
# -------------------------------------------------------------
# - Now migrating the data
# -------------------------------------------------------------
statnr = ",".join(["%d"]*len(stations)) % tuple(stations)
sql = "SELECT * FROM %s WHERE statnr in (%s)" % (srctable,statnr)
cur = self.db.cursor()
cur.execute(sql)
desc = cur.description
data = cur.fetchall()
print " %d rows to copy from %s -> %s" % (len(data),srctable,dsttable)
cols = []
for rec in desc: cols.append( rec[0] )
sql = "REPLACE %s (" % dsttable + ",".join(cols) + ") VALUES (" + ",".join( ["%s"]*len(cols)) + ")"
cur.executemany( sql, data )
self.db.commit()
print " Data copied to %s table. Done." % dsttable
# ----------------------------------------------------------------
# - Remove old observations from live table.
# ----------------------------------------------------------------
[docs] def cleanup_live_table(self):
"""
We have a live and an archive table. These two tables are
defined in the config.conf file. Here we are deleting all
observations from the live table ('srctable') which are older
than about 'db_days' days (as well defined in the config.conf file).
"""
import os, sys, time
from datetime import datetime as dt
import numpy as np
days = self.config['cleanup_db_days']
maxage = np.floor(time.time() / 86400)*86400 - days*86400
srctable = self.config['cleanup_srctable']
print ""
print " - Delete old observations from %s table" % srctable
# - Source table does not exist?
if not self.db.__does_table_exist__( srctable ):
print "[!] Source table %s does not exist! RETURN!\n" % srctable
return False
print " Delete observations older than %s" % dt.fromtimestamp( maxage )
# - SQL - delete
sql = "DELETE FROM %s WHERE datumsec < %d" % (srctable,maxage)
cur = self.db.cursor()
cur.execute( sql )
self.db.commit()
print " Old observations deleted. Done."
# ----------------------------------------------------------------
# - Close database
# ----------------------------------------------------------------
[docs] def closeDB(self):
"""
Closing database.
"""
self.db.close()