Skip to content

Commit

Permalink
WIP: Code optimisation
Browse files Browse the repository at this point in the history
  • Loading branch information
radheyakale committed Jun 29, 2022
2 parents fcc597a + 144a86a commit c479f08
Show file tree
Hide file tree
Showing 53 changed files with 602 additions and 355 deletions.
5 changes: 5 additions & 0 deletions .bandit
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[bandit]
; Only test the Gramex source folder, not tests or testlib
exclude = */tests/*,*/testlib/*,*/node_modules/*
; B101:assert_used - assertions are used in test cases and are harmless in code
skips = B101
14 changes: 1 addition & 13 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,11 @@ root = true

# Apply common styles for most standard code files.
# Do not apply to * - that covers binary files as well
[*.{js,html,php,py,css,svg,json,less,yaml,yml,scss,xml,sh,java,bat,R}]
[*.{js,html,php,css,svg,json,less,yaml,yml,scss,xml,sh,java,bat,R}]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
# Stick to 2-space indenting by default, to conserve space
indent_style = space
indent_size = 2

[*.py]
indent_size = 4

[Makefile]
indent_style = tab
indent_size = 4

[testlib/test_config/config.empty.yaml]
insert_final_newline = false
[tests/dir/gramex.yaml]
insert_final_newline = false
5 changes: 3 additions & 2 deletions gramex/apps/admin/controlpanel.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,11 @@ def evaluate(handler, code):
# Run code and get the result. (Result is None for exec)
try:
context = contexts.setdefault(handler.session['id'], {})
# B307:eval B102:exec_used is safe since only admin can run this
if mode == 'eval':
result = eval(co, context) # nosec: only admin can run this
result = eval(co, context) # nosec B307
else:
exec(co, context) # nosec: only admin can run this
exec(co, context) # nosec B102
result = None
except Exception as e:
result = e
Expand Down
17 changes: 9 additions & 8 deletions gramex/apps/admin2/gramexadmin.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,11 @@ def evaluate(handler, code):
try:
context = contexts.setdefault(handler.session['id'], {})
context['handler'] = handler
# B307:eval B102:exec_used is safe since only admin can run this
if mode == 'eval':
result = eval(co, context) # nosec: only admin can run this
result = eval(co, context) # nosec B307
else:
exec(co, context) # nosec: only admin can run this
exec(co, context) # nosec B102
result = None
except Exception as e:
result = e
Expand Down Expand Up @@ -200,12 +201,12 @@ def system_information(handler):

from gramex.cache import Subprocess
apps = {
# shell=True is safe here since the code is constructed entirely in this function
# We use shell to pick up the commands' paths from the shell.
('node', 'version'): Subprocess('node --version', shell=True), # nosec
('npm', 'version'): Subprocess('npm --version', shell=True), # nosec
('yarn', 'version'): Subprocess('yarn --version', shell=True), # nosec
('git', 'version'): Subprocess('git --version', shell=True), # nosec
# B602:any_other_function_with_shell_equals_true is safe here since the code is
# constructed entirely in this function. We use shell to pick up the commands' paths.
('node', 'version'): Subprocess('node --version', shell=True), # nosec 602
('npm', 'version'): Subprocess('npm --version', shell=True), # nosec 602
('yarn', 'version'): Subprocess('yarn --version', shell=True), # nosec 602
('git', 'version'): Subprocess('git --version', shell=True), # nosec 602
}
for key, proc in apps.items():
stdout, stderr = yield proc.wait_for_exit()
Expand Down
6 changes: 6 additions & 0 deletions gramex/apps/logviewer/gramex.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Configurable variables
# LOGVIEWER_DB
# LOGVIEWER_PATH_UI
# LOGVIEWER_PATH_RENDER
# $LOGVIEWER_FORMHANDLER_KWARGS
Expand All @@ -9,6 +10,9 @@
# $LOGVIEWER_SCHEDULER_KWARGS

variables:
LOGVIEWER_DB:
default:
url: sqlite:///$GRAMEXDATA/logs/logviewer.db
LOGVIEWER_SCHEDULER_PORT:
default: ''
LOGVIEWER_PATH_UI:
Expand Down Expand Up @@ -149,6 +153,7 @@ schedule:
apps/logviewer-$* if '--listen.port=' + LOGVIEWER_SCHEDULER_PORT in ''.join(sys.argv[1:]) or not LOGVIEWER_SCHEDULER_PORT:
function: logviewer.summarize
kwargs:
db: $LOGVIEWER_DB
custom_dims:
import.merge: $LOGVIEWER_CUSTOM_DIMENSIONS
session_threshold: 15
Expand Down Expand Up @@ -187,6 +192,7 @@ schedule:
op: NOTCONTAINS
value: '\.js|\.css|\.ico|\.png|\.jpg|\.jpeg|\.gif|\.otf|\.woff.*|\.eot'
as: uri_1
# TODO: this may not work as logviewer.summarize() does not accept any kwargs!
import.merge: $LOGVIEWER_SCHEDULER_KWARGS
startup: true
# Run at 6pm local time. In India, this is a bit after 0:00 UTC,
Expand Down
141 changes: 64 additions & 77 deletions gramex/apps/logviewer/logviewer.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
import re
import sys
import os.path
import sqlite3
from glob import glob
# lxml.etree is safe on https://github.com/tiran/defusedxml/tree/main/xmltestdata
from lxml.etree import Element # nosec: lxml is fixed
from lxml.html import fromstring, tostring # nosec: lxml is fixed
# B410:import_lxml lxml.etree is safe on https://github.com/tiran/defusedxml/tree/main/xmltestdata
from lxml.etree import Element # nosec B410
from lxml.html import fromstring, tostring # nosec B410
import numpy as np
import pandas as pd
import gramex.data
import gramex.cache
from gramex import conf
from gramex.config import app_log
from gramex.transforms import build_transform
from typing import List

if sys.version_info.major == 3:
unicode = str

DB_CONFIG = {
'table': 'agg{}',
'levels': ['M', 'W', 'D'],
'dimensions': [{'key': 'time', 'freq': '?level'},
'user.id', 'ip', 'status', 'uri'],
'dimensions': [
{'key': 'time', 'freq': '?level'},
'user.id', 'ip', 'status', 'uri'
],
'metrics': {
'duration': ['count', 'sum'],
'new_session': ['sum'],
'session_time': ['sum']
}
}

# TODO: extra_columns should not be a global. Once instance may use multiple logviewers!
extra_columns = []
for key in conf.get('schedule', []):
if 'kwargs' in conf.schedule[key] and 'custom_dims' in conf.schedule[key].kwargs:
Expand Down Expand Up @@ -61,12 +64,6 @@ def pdagg(df, groups, aggfuncs):
return dff.reset_index()


def table_exists(table, conn):
'''check if table exists in sqlite db'''
query = "SELECT name FROM sqlite_master WHERE type='table' AND name=?"
return not pd.read_sql(query, conn, params=[table]).empty


def add_session(df, duration=30, cutoff_buffer=0):
'''add new_session based on `duration` threshold
add cutoff_buffer in minutes for first and last session requests
Expand Down Expand Up @@ -103,21 +100,28 @@ def prepare_logs(df, session_threshold=15, cutoff_buffer=0, custom_dims={}):
return df


def create_column_if_not_exists(table, freq, conn):
for col in extra_columns:
for row in conn.execute(f'PRAGMA table_info({table(freq)})'):
if row[1] == col:
break
else:
query = f'ALTER TABLE {table(freq)} ADD COLUMN "{col}" TEXT DEFAULT ""'
conn.execute(query)
conn.commit()


def summarize(transforms=[], post_transforms=[], run=True,
session_threshold=15, cutoff_buffer=0, custom_dims=None):
'''summarize'''
app_log.info('logviewer: Summarize started')
def summarize(
db: dict,
transforms: List[dict] = [],
post_transforms: List[dict] = [],
session_threshold: float = 15,
cutoff_buffer: float = 0,
custom_dims: dict = None) -> None:
'''Summarizes log files into a database periodically.
Parameters:
db: SQLAlchemy database configuration.
transforms: List of transforms to be applied on data.
post_transforms: List of post transforms to be applied on data.
session_threshold: Minimum threshold for the session.
cutoff_buffer: In minutes for first and last session requests.
custom_dims: Custom columns to be added to the logviewer.
This function is called by a scheduler and/or on start of gramex.
It will aggregate and update logs from requests.csv file by comparing the
timestamp of last added logs. It creates the aggregation tables if they don't exist.
'''
app_log.info('logviewer.summarize started')
levels = DB_CONFIG['levels']
table = DB_CONFIG['table'].format
# dimensions and metrics to summarize
Expand All @@ -126,70 +130,52 @@ def summarize(transforms=[], post_transforms=[], run=True,
log_file = conf.log.handlers.requests.filename
# Handle for multiple instances requests.csv$LISTENPORT
log_file = '{0}{1}'.format(*log_file.partition('.csv'))
folder = os.path.dirname(log_file)
conn = sqlite3.connect(os.path.join(folder, 'logviewer.db'))

for freq in levels:
try:
create_column_if_not_exists(table, freq, conn)
except sqlite3.OperationalError:
# Inform when table is created for the first time
app_log.info('logviewer: OperationalError: Table does not exist')

# drop agg tables from database
if run in ['drop', 'reload']:
droptable = 'DROP TABLE IF EXISTS {}'.format
for freq in levels:
app_log.info('logviewer: Dropping {} table'.format(table(freq)))
conn.execute(droptable(table(freq)))
conn.commit()
conn.execute('VACUUM')
if run == 'drop':
conn.close()
return
# all log files sorted by modified time
log_files = sorted(glob(log_file + '*'), key=os.path.getmtime)
max_date = None

def filesince(filename, date):
match = re.search(r'(\d{4}-\d{2}-\d{2})$', filename)
backupdate = match.group() if match else ''
return backupdate >= date or backupdate == ''

# get this month log files if db is already created
if table_exists(table(levels[-1]), conn):
query = 'SELECT MAX(time) FROM {}'.format(table(levels[-1])) # nosec: table() is safe
max_date = pd.read_sql(query, conn).iloc[0, 0]
app_log.info(f'logviewer: last processed till {max_date}')
this_month = max_date[:8] + '01'
log_files = [f for f in log_files if filesince(f, this_month)]
# get most recent log files if db is already created
try:
log_filter = gramex.data.filter(**db, table=table(levels[-1]), args={})
max_date = log_filter.sort_values('time', ascending=False)['time'].iloc[0]
max_date = pd.to_datetime(max_date)
except Exception: # noqa
max_date = None
else:
app_log.info(f'logviewer.summarize: processing since {max_date}')
this_month = max_date.strftime('%Y-%m-01')
log_files = [f for f in log_files if filesince(f, this_month)]

if not log_files:
app_log.info('logviewer: no log files to process')
app_log.info('logviewer.summarize: no log files to process')
return
# Create dataframe from log files
columns = conf.log.handlers.requests['keys']
# TODO: avoid concat?
app_log.info(f'logviewer: files to process {log_files}')
app_log.info(f'logviewer.summarize: processing {log_files}')
data = pd.concat([
pd.read_csv(f, names=columns, encoding='utf-8').fillna('-')
for f in log_files
], ignore_index=True)
app_log.info(
'logviewer: prepare_logs {} rows with {} mint session_threshold'.format(
'logviewer.summarize: prepare_logs {} rows with session_threshold={}'.format(
len(data.index), session_threshold))
data = prepare_logs(df=data,
session_threshold=session_threshold,
cutoff_buffer=cutoff_buffer,
custom_dims=custom_dims)
app_log.info('logviewer: processed and returned {} rows'.format(len(data.index)))
app_log.info('logviewer.summarize: processed {} rows'.format(len(data.index)))
# apply transforms on raw data
app_log.info('logviewer: applying transforms')
app_log.info('logviewer.summarize: applying transforms')
for spec in transforms:
apply_transform(data, spec) # applies on copy
# levels should go from M > W > D
for freq in levels:
app_log.info('logviewer.summarize: aggregating {}'.format(table(freq)))
# filter dataframe for max_date.level
if max_date:
date_from = max_date
Expand All @@ -199,29 +185,30 @@ def filesince(filename, date):
date_from -= pd.offsets.MonthBegin(1)
data = data[data.time.ge(date_from)]
# delete old records
query = f'DELETE FROM {table(freq)} WHERE time >= ?' # nosec: table() is safe
conn.execute(query, (f'{date_from}',))
conn.commit()
gramex.data.delete(**db, table=table(freq), args={'time>~': [date_from]}, id=['time'])
groups[0]['freq'] = freq
# get summary view
app_log.info('logviewer: pdagg for {}'.format(table(freq)))
dff = pdagg(data, groups, aggfuncs)
# apply post_transforms here
app_log.info('logviewer: applying post_transforms')
for spec in post_transforms:
apply_transform(dff, spec)
# insert new records
try:
dff.to_sql(table(freq), conn, if_exists='append', index=False)
# dff columns should match with table columns
# if not, call summarize run='reload' to
# drop all the tables and rerun the job
except sqlite3.OperationalError:
app_log.info('logviewer: OperationalError: run: reload')
summarize(transforms=transforms, run='reload')
return
conn.close()
app_log.info('logviewer: Summarize completed')
cols = {}
for col in dff.columns:
dt = dff[col].dtype.type
if pd.api.types.is_datetime64_any_dtype(dt):
cols[col] = 'DATETIME'
elif pd.api.types.is_bool_dtype(dt):
cols[col] = 'BOOLEAN'
elif pd.api.types.is_integer_dtype(dt):
cols[col] = 'INTEGER'
elif pd.api.types.is_numeric_dtype(dt):
cols[col] = 'REAL'
else:
cols[col] = 'TEXT'
gramex.data.alter(**db, table=table(freq), columns=cols)
gramex.data.insert(**db, table=table(freq), args=dff.to_dict())
app_log.info('logviewer.summarize: completed')
return


Expand Down
6 changes: 4 additions & 2 deletions gramex/apps/ui/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import gramex
import gramex.cache
import string
import subprocess # nosec: only for JS compilation
# B404:import_subprocess only for JS compilation
import subprocess # nosec B404
from hashlib import md5
from tornado.gen import coroutine, Return
from functools import partial
Expand All @@ -29,7 +30,8 @@ def join(*args):

def get_cache_key(state):
cache_key = json.dumps(state, sort_keys=True, ensure_ascii=True).encode('utf-8')
return md5(cache_key).hexdigest()[:5] # nosec: non-cryptographic use
# B303:md5 is safe here - it's not for cryptographic use
return md5(cache_key).hexdigest()[:5] # nosec B303


@coroutine
Expand Down
4 changes: 2 additions & 2 deletions gramex/apps/ui/setup.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ fs.readdirSync(themes_guide_root).forEach(function (dir) {
fs.readFileSync(theme_file, 'utf8')
.replace('@import "bootstrap";', '@import "gramexui";')
// Themes Guide disables grid classes. But we want to use them, so kill this line
.replace('$enable-grid-classes:false;\n', ''))
.replace('$enable-grid-classes:false;\n', '') + '\n')
themes.push(`themes-guide/${dir}`)
}
})
execSync('rm -rf bootstrap-themes', { cwd: tmp })

// Save list of themes
fs.writeFileSync('theme/themes.json', JSON.stringify({ 'themes': themes }))
fs.writeFileSync('theme/themes.json', JSON.stringify({ 'themes': themes }) + '\n')


// Utility functions
Expand Down
2 changes: 1 addition & 1 deletion gramex/apps/ui/theme/themes-guide/blue_voltage.scss
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ $btn-border-radius-lg:1.6rem;
$btn-border-radius-sm:.8rem;
@import "gramexui";

// Add SASS theme customizations here..
// Add SASS theme customizations here..
2 changes: 1 addition & 1 deletion gramex/apps/ui/theme/themes-guide/boldstrap.scss
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ $dark:#3c4055;
$body-bg:#efefef;
@import "gramexui";

// Add SASS theme customizations here..
// Add SASS theme customizations here..

0 comments on commit c479f08

Please sign in to comment.