#!/usr/bin/env python
from Common import *
from UserDict import UserDict
from Object import Object
from Servlet import Servlet
from ServletFactory import *
from UnknownFileTypeServlet import UnknownFileTypeServletFactory
from types import FloatType
from glob import glob
import imp
import string
from threading import Lock, Thread, Event
from time import *
from fnmatch import fnmatch
from WebKit.Cookie import Cookie
from WebUtils.HTMLForException import HTMLForException
from ExceptionHandler import ExceptionHandler
from ConfigurableForServerSidePath import ConfigurableForServerSidePath
from TaskKit.Scheduler import Scheduler
from ASStreamOut import ASStreamOut
debug = 0
class ApplicationError(Exception):
pass
class EndResponse(Exception):
"""
Used to prematurely break out of the awake()/respond()/sleep() cycle
without reporting a traceback. During servlet processing, if this
exception is caught during respond() then sleep() is called and the
response is sent. If caught during awake() then both respond() and
sleep() are skipped and the response is sent.
"""
pass
class Application(ConfigurableForServerSidePath, Object):
"""
FUTURE
* 2000-04-09 ce: Automatically open in browser.
* 2000-04-09 ce: Option to remove HTML comments in responses.
* 2000-04-09 ce: Option remove unnecessary white space in responses.
* 2000-04-09 ce: Debugging flag and debug print method.
* 2000-04-09 ce: A web-based, interactive monitor to the application.
* 2000-04-09 ce: Record and playback of requests and responses. Useful for regression testing.
* 2000-04-09 ce: sessionTimeout() and a hook for when the session has timed out.
* 2000-04-09 ce: pageRefreshOnBacktrack
* 2000-04-09 ce: terminate() and isTerminating()
* 2000-04-09 ce: isRefusingNewSessions()
* 2000-04-09 ce: terminateAfterTimeInterval()
* 2000-04-09 ce: restoreSessionWithID:inTransaction:
* 2000-04-09 ce: pageWithNameForRequest/Transaction() (?)
* 2000-04-09 ce: port() and setPort() (?)
* 2000-04-09 ce: Does request handling need to be embodied in a separate object?
- Probably, as we may want request handlers for various file types.
* 2000-04-09 ce: Concurrent request handling (probably through multi-threading)
"""
## Init ##
def __init__(self, server=None, transactionClass=None, sessionClass=None, requestClass=None, responseClass=None, exceptionHandlerClass=None, contexts=None, useSessionSweeper=1):
self._server = server
self._serverSidePath = server.serverSidePath()
ConfigurableForServerSidePath.__init__(self)
Object.__init__(self)
if self.setting('PrintConfigAtStartUp'):
self.printConfig()
self.initVersions()
if transactionClass:
self._transactionClass = transactionClass
else:
from Transaction import Transaction
self._transactionClass = Transaction
if sessionClass:
self._sessionClass = sessionClass
else:
from Session import Session
self._sessionClass = Session
if requestClass:
self._requestClass = requestClass
else:
from HTTPRequest import HTTPRequest
self._requestClass = HTTPRequest
if responseClass:
self._responseClass = responseClass
else:
from HTTPResponse import HTTPResponse
self._responseClass = HTTPResponse
if exceptionHandlerClass:
self._exceptionHandlerClass = exceptionHandlerClass
else:
self._exceptionHandlerClass = None
# Init other attributes
self._servletCacheByPath = {}
self._serverSideInfoCacheByPath = {}
self._cacheDictLock = Lock()
self._instanceCacheSize = self._server.setting('MaxServerThreads')
self._shutDownHandlers = []
# Set up servlet factories
self._factoryList = [] # the list of factories
self._factoryByExt = {} # a dictionary that maps all known extensions to their factories, for quick look up
self.addServletFactory(PythonServletFactory(self))
self.addServletFactory(UnknownFileTypeServletFactory(self))
# ^ @@ 2000-05-03 ce: make this customizable at least through a method (that can be overridden) if not a config file (or both)
## TaskManager
if self._server.isPersistent():
self._taskManager = Scheduler(1)
self._taskManager.start()
## End TaskManager
## Contexts
if contexts: #Try to get this from the Config file
defctxt = contexts
else: #Get it from Configurable object, which gets it from defaults or the user config file
defctxt = self.setting('Contexts')
self._contexts={}
# First load all contexts except the default
contextDirToName = {}
for i in defctxt.keys():
if i != 'default':
if not os.path.isabs(defctxt[i]):
path = self.serverSidePath(defctxt[i])
else:
path = defctxt[i]
self.addContext(i, path)
contextDirToName[path] = i
# @@ gat: this code would be much cleaner if we had a separate DefaultContext config variable.
# load in the default context, if any
self._defaultContextName = None
if defctxt.has_key('default'):
if not os.path.isabs(defctxt['default']):
path = self.serverSidePath(defctxt['default'])
else:
path = defctxt['default']
# see if the default context is the same as one of the other contexts
self._defaultContextName = contextDirToName.get(path, None)
if self._defaultContextName:
# the default context is shared with another context
self._setContext('default', self.context(self._defaultContextName))
else:
# the default context is separate from the other contexts, so add it like any other context
self._defaultContextName = 'default'
self.addContext('default', path)
print
## End Contexts
## Session store
# Create the session store
from SessionMemoryStore import SessionMemoryStore
from SessionFileStore import SessionFileStore
from SessionDynamicStore import SessionDynamicStore
klass = locals()['Session'+self.setting('SessionStore','File')+'Store']
assert type(klass) is ClassType
self._sessions = klass(self)
## End Session store
print 'Current directory:', os.getcwd()
self.running = 1
if useSessionSweeper:
self.startSessionSweeper()
self._cacheServletInstances = self.setting("CacheServletInstances",1)
print
try:
# First try the working dir
self._404Page = open(os.path.join(self._serverSidePath,"404Text.txt"),"r").read()
except:
try:
# Then try the directory this file is located in
self._404Page = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "404Text.txt"),"r").read()
except:
# Fall back on a simple string
self._404Page = """404 Error<p>File Not Found: %s"""
# @@ ISB 09/02: make this default or get rid of it eventually
if self.setting('ExtraPathInfo', 0):
self.serverSideInfoForRequestOld = self.serverSideInfoForRequest
self.serverSideInfoForRequest = self.serverSideInfoForRequestNewAlgorithm
self._serverSideInfoCacheByPathNew = {}
self._filesToHideRegexes = []
self._filesToServeRegexes = []
from fnmatch import translate as fnTranslate
import re
for pattern in self.setting('FilesToHide'):
self._filesToHideRegexes.append(
re.compile(fnTranslate(pattern)))
for pattern in self.setting('FilesToServe'):
self._filesToServeRegexes.append(
re.compile(fnTranslate(pattern)))
def initVersions(self):
"""
Initialize attributes that store the Webware and WebKit versions
as both tuples and strings. These are stored in the Properties.py
files.
"""
from MiscUtils.PropertiesObject import PropertiesObject
props = PropertiesObject(os.path.join(self.webwarePath(), 'Properties.py'))
self._webwareVersion = props['version']
self._webwareVersionString = props['versionString']
props = PropertiesObject(os.path.join(self.webKitPath(), 'Properties.py'))
self._webKitVersion = props['version']
self._webKitVersionString = props['versionString']
## Task access
def taskManager(self):
return self._taskManager
## Session sweep task
def startSessionSweeper(self):
from Tasks import SessionTask
import time
task = SessionTask.SessionTask(self._sessions)
tm = self.taskManager()
sweepinterval = self.setting('SessionTimeout')*60/10
tm.addPeriodicAction(time.time()+sweepinterval, sweepinterval, task, "SessionSweeper")
print "Session Sweeper started"
## Shutdown
def shutDown(self):
"""
Called by AppServer when it is shuting down. The __del__ function of Application probably won't be called due to circular references.
"""
print "Application is Shutting Down"
self.running = 0
if hasattr(self, '_sessSweepThread'):
# We don't always have this, hence the 'if' above
self._closeEvent.set()
self._sessSweepThread.join()
del self._sessSweepThread
self._sessions.storeAllSessions()
if self._server.isPersistent():
self.taskManager().stop()
del self._sessions
del self._factoryByExt
del self._factoryList
del self._server
del self._servletCacheByPath
# Call all registered shutdown handlers
for shutDownHandler in self._shutDownHandlers:
shutDownHandler()
del self._shutDownHandlers
print "Application has been succesfully shutdown."
def addShutDownHandler(self, func):
"""
Adds this function to a list of functions that are called when the application
shuts down.
"""
self._shutDownHandlers.append(func)
## Config ##
def defaultConfig(self):
return {
'PrintConfigAtStartUp': 1,
'DirectoryFile': ['index', 'Main'],
'ExtensionsToIgnore': ['.pyc', '.pyo', '.py~', '.bak', '.tmpl'],
'ExtensionsToServe': None,
'UseCascadingExtensions':1,
'ExtensionCascadeOrder':['.psp','.py','.html',],
'FilesToHide': ['.*', '*~', '*bak', '*.tmpl', '*.pyc', '*.pyo', '*.config'],
'FilesToServe': None,
'LogActivity': 1,
'ActivityLogFilename': 'Logs/Activity.csv',
'ActivityLogColumns': ['request.remoteAddress', 'request.method', 'request.uri', 'response.size', 'servlet.name', 'request.timeStamp', 'transaction.duration', 'transaction.errorOccurred'],
'SessionStore': 'Memory', # can be File or Memory
'SessionTimeout': 60, # minutes
'IgnoreInvalidSession': 1,
'UseAutomaticPathSessions': 0,
# Error handling
'ShowDebugInfoOnErrors': 1,
'IncludeFancyTraceback': 0,
'FancyTracebackContext': 5,
'UserErrorMessage': 'The site is having technical difficulties with this page. An error has been logged, and the problem will be fixed as soon as possible. Sorry!',
'ErrorLogFilename': 'Logs/Errors.csv',
'SaveErrorMessages': 1,
'ErrorMessagesDir': 'ErrorMsgs',
'EmailErrors': 0, # be sure to review the following settings when enabling error e-mails
'ErrorEmailServer': 'mail.-.com',
'ErrorEmailHeaders': { 'From': '-@-.com',
'To': ['-@-.com'],
'Reply-to': '-@-.com',
'content-type': 'text/html',
'Subject': 'Error'
},
'MaxValueLengthInExceptionReport': 500,
'RPCExceptionReturn': 'traceback',
'ReportRPCExceptionsInWebKit': 1,
'Contexts': { 'default': 'Examples',
'Admin': 'Admin',
'Examples': 'Examples',
'Documentation': 'Documentation',
'Testing': 'Testing',
},
'Debug': {
'Sessions': 0,
},
'OldStyleActions': 0,
}
def configFilename(self):
return self.serverSidePath('Configs/Application.config')
def configReplacementValues(self):
return self._server.configReplacementValues()
## Versions ##
def version(self):
"""
Returns the version of the application. This implementation
returns '0.1'. Subclasses should override to return the correct
version number.
"""
## @@ 2000-05-01 ce: Maybe this could be a setting 'AppVersion'
return '0.1'
def webwareVersion(self):
""" Returns the Webware version as a tuple. """
return self._webwareVersion
def webwareVersionString(self):
""" Returns the Webware version as a printable string. """
return self._webwareVersionString
def webKitVersion(self):
""" Returns the WebKit version as a tuple. """
return self._webKitVersion
def webKitVersionString(self):
""" Returns the WebKit version as a printable string. """
return self._webKitVersionString
## Dispatching Requests ##
def dispatchRawRequest(self, newRequestDict, strmOut):
return self.dispatchRequest(self.createRequestForDict(newRequestDict), strmOut)
def dispatchRequest(self, request, strmOut):
""" Creates the transaction, session, response and servlet for the new request which is then dispatched. The transaction is returned. """
assert request is not None
transaction = None
if request.value('_captureOut_', 0):
real_stdout = sys.stdout
sys.stdout = StringIO()
transaction = self.createTransactionForRequest(request)
response = self.createResponseInTransaction(transaction, strmOut)
try:
ssPath = request.serverSidePath()
if ssPath is None or not os.path.exists(ssPath):
self.handleBadURL(transaction)
elif isdir(ssPath) and noslash(request.pathInfo()): # (*) see below
self.handleDeficientDirectoryURL(transaction)
elif self.isSessionIdProblematic(request):
self.handleInvalidSession(transaction)
elif self.setting('UseAutomaticPathSessions') and not request.hasPathSession():
self.handleMissingPathSession(transaction)
else:
validFile = 1
baseName = os.path.split(ssPath)[1]
for patternToHide in self.setting('FilesToHide'):
if fnmatch(baseName, patternToHide):
validFile = 0
patternsToServe = self.setting('FilesToServe')
if patternsToServe:
validFile = 0
for patternToServe in self.setting('FilesToServe'):
if fnmatch(baseName, patternToServe):
validFile = 1
if not validFile:
self.handleBadURL(transaction)
else:
self.handleGoodURL(transaction)
if request.value('_captureOut_', 0):
response.write('''<br><p><table><tr><td bgcolor=#EEEEEE>
<pre>%s</pre></td></tr></table>''' % sys.stdout.getvalue())
sys.stdout = real_stdout
response.deliver()
# (*) We have to use pathInfo() instead of uri() when looking for the trailing slash, because some webservers, notably Apache, append a trailing / to REQUEST_URI in some circumstances even though the user did not specify that (for example: http://localhost/WebKit.cgi).
except:
if debug: print "*** ERROR ***"
if transaction:
transaction.setErrorOccurred(1)
self.handleExceptionInTransaction(sys.exc_info(), transaction)
transaction.response().deliver() # I hope this doesn't throw an exception. :-) @@ 2000-05-09 ce: provide a secondary exception handling mechanism
pass
if self.setting('LogActivity'):
self.writeActivityLog(transaction)
path = request.serverSidePath()
self.returnInstance(transaction, path)
# possible circular reference, so delete it
request.clearTransaction()
response.clearTransaction()
return transaction
def handleBadURL(self, transaction):
res = transaction.response()
res.setHeader('Status', '404 Error')
## res.write('<p> 404 Not found: %s' % transaction.request().uri())
res.write(self._404Page % (transaction.request().uri()))
# @@ 2000-06-26 ce: This error page is pretty primitive
# @@ 2000-06-26 ce: We should probably load a separate template file and display that
def handleDeficientDirectoryURL(self, transaction):
# @@ 2000-11-29 gat:
# This splitting and rejoining is necessary in order to handle
# url's like http://localhost/WebKit.cgi/Examples?foo=1
# without infinite looping. I'm not sure this is the "right"
# way to do this, as it seems to contradict the docstring of
# uri(), but it works. Needs further investigation.
uri = string.split(transaction.request().uri(), '?')
uriEnd = string.split(uri[0], '/')[-1]
# @@ gat 2000-05-19: this was changed to use a relative redirect starting with "." to force
# a client redirect instead of a server redirect. This fixes problems on IIS.
uri[0] = './' + uriEnd + '/'
newURL = string.join(uri, '?')
if debug: print "* handleDeficientDirectoryURL - reditrect to",newURL
res = transaction.response()
res.setHeader('Status', '301 Redirect')
res.setHeader('Location', newURL)
res.write('''<html>
<head>
<title>301 Moved Permanently</title>
</head>
<body>
<h1>Moved Permanently</h1>
<p> The document has moved to <a href="%s">%s</a>.
</body>
</html>''' % (newURL, newURL))
def isSessionIdProblematic(self, request, debug=0):
"""
Returns 1 if there is a session id and it's not valid (either because it doesn't exist or because it has expired due to inactivity). Having no session id is not considered problematic.
This method will also expire the session if it's too old.
This method is invoked by dispatchRequest() as one of the major steps in handling requests.
"""
debug = self.setting('Debug')['Sessions']
if debug: prefix = '>> [session] isSessionIdProblematic:'
sid = request.sessionId()
if sid:
if self._sessions.has_key(sid):
if (time()-request.session().lastAccessTime()) >= request.session().timeout():
if debug: print prefix, 'session expired: %s' % repr(sid)
del self._sessions[sid]
problematic = 1
else:
problematic = 0
else:
if debug: print prefix, 'session does not exist: %s' % repr(sid)
problematic = 1
else:
problematic = 0
if debug: print prefix, 'isSessionIdProblematic =', problematic, ', id =', sid
return problematic
def handleInvalidSession(self, transaction):
res = transaction.response()
debug = self.setting('Debug')['Sessions']
if debug: prefix = '>> handleInvalidSession:'
cookie = Cookie('_SID_', '')
cookie.setPath('/')
res.addCookie(cookie)
if debug: print prefix, "set _SID_ to ''"
if self.setting('IgnoreInvalidSession'):
# Delete the session ID cookie (and field since session IDs can also
# be encoded into fields) from the request, then handle the servlet
# as though there was no session
try:
del transaction.request().cookies()['_SID_']
except KeyError:
pass
try:
transaction.request().delField('_SID_')
except KeyError:
pass
transaction.request().setSessionExpired(1)
if self.setting('UseAutomaticPathSessions'):
self.handleMissingPathSession(transaction)
else:
self.handleGoodURL(transaction)
else:
res.write('''<html> <head> <title>Session expired</title> </head>
<body> <h1>Session Expired</h1>
<p> Your session has expired and all information related to your previous working session with this site has been cleared. <p> You may try this URL again by choosing Refresh/Reload, or revisit the front page.
</body>
</html>
''')
# @@ 2000-08-10 ce: This is a little cheesy. We could load a template...
def handleMissingPathSession(self,transaction):
"""
if UseAutomaticPathSessions is enabled in Application.config
we redirect the browser to a url with SID in path
http://gandalf/a/_SID_=2001080221301877755/Examples/
_SID_ is extracted and removed from path in HTTPRequest.py
this is for convinient building of webapps that must not
depend on cookie support
"""
newSid = transaction.session().identifier()
request = transaction.request()
url = request.adapterName() + '/_SID_='+ newSid + '/' + request.pathInfo() + (request.extraURLPath() or '')
if request.queryString():
url = url + '?' + request.queryString()
if self.setting('Debug')['Sessions']:
print ">> [sessions] handling UseAutomaticPathSessions, redirecting to", url
transaction.response().sendRedirect(url)
def handleGoodURL(self, transaction):
self.createServletInTransaction(transaction)
try:
self.awake(transaction)
try:
self.respond(transaction)
except EndResponse:
pass
self.sleep(transaction)
except EndResponse:
pass
def processURLPath(self, req, URL):
"""
Return a URL Path relative to the current request and context.
Absolute references in the URL (starting with '/' are treated
absolute to the current context.
"""
# Construct the url path for the servlet we're calling
urlPath = req.urlPath()
if urlPath=='':
urlPath = '/'
elif urlPath[-1]=='/':
urlPath = urlPath
else:
lastSlash = string.rfind(urlPath, '/')
urlPath = urlPath[:lastSlash+1]
extraPath = ''
if URL[:1] == "/":
extraPath = req.siteRootFromCurrentServlet()
urlPath = WebUtils.Funcs.normURL(urlPath + extraPath + URL)
if debug:
print "*processURLPath(%s)=%s" % (URL, urlPath)
return urlPath
def forward(self, trans, URL):
"""
Enable a servlet to pass a request to another servlet. The Request object is kept the same, and may be used
to pass information to the next servlet. The next servlet may access the parent servlet through request.parent(),
which will return the parent servlet. The first servlet will not be able to send any new response data once
the call to forwardRequest returns.
New Response and Transaction objects are created.
Currently the URL is always relative to the existing URL.
NOTE: @@ sgd 2003-01-15 - presently this goes through dispatchRequest() which
under some circumstances can result in sending a redirect() which causes the
browser to re-get the URL. This defeats the purpose of passing information
to a servlet in the request or transaction objects. This only happens in
cases like a forward to a directory where no trailing / was specified.
"""
# @@ sgd 2003-01-15
# to fix the above warning about using dispatchRequest() consider
# using the includeURL() code but handle the session and clearing
# the output stream here.
if debug: print "> forward(%s)" % str(URL)
req = trans.request()
urlPath = self.processURLPath(req, URL)
#save the original URL
oldURL = req.urlPath()
req.setURLPath(urlPath)
#add a reference to the parent servlet
req.addParent(req.transaction()._servlet)
# Store the session so that the new servlet can access its values
if trans.hasSession():
self._sessions.storeSession(trans.session())
# We might have created a brand-new session prior to this call. If so, we need
# to set the _SID_ identifier in the request so that the new transaction will
# know about the new session.
# gat 200-06-21: this feels like a hack, but it is necessary to prevent losing
# session information.
if trans.hasSession() and not req.hasValue('_SID_'):
if debug: print 'Application.forward(): propagating new session ID into request'
req.setField('_SID_', trans.session().identifier())
#get the output stream and set it in the new response
strmOut = req.transaction().response().streamOut()
strmOut.clear()
newTrans = self.dispatchRequest(req, strmOut)
req.popParent()
req.setURLPath(oldURL)
#give the old response a dummy streamout- nasty hack, better idea anyone?
trans.response()._strmOut = ASStreamOut()
req._transaction = trans #this is needed by dispatchRequest
# Get rid of the session in the old transaction so it won't try to save it,
# thereby wiping out session changes made in the servlet we forwarded to
trans.setSession(None)
def forwardRequest(self, trans, URL):
print "forwardRequest is deprecated. Use forward()"
return self.forward(trans, URL)
def includeURL(self, trans, URL):
"""
Enable a servlet to pass a request to another servlet. This implementation
handles chaining and requestDispatch in Java.
The Request, Rssponse and Session objects are all kept the same, so the Servlet
that is called may receive information through those objects. The catch is that
the function WILL return to the calling servlet, so the calling servlet should either
take advantage of that or return immediately.
Also, if the response has already been partially sent, it can't be reversed.
"""
if debug: print "> includeURL(%s)" % str(URL)
req = trans.request()
#Save the things we're gonna change.
currentPath=req.urlPath()
currentServlet=trans._servlet
urlPath = self.processURLPath(req, URL)
req.setURLPath(urlPath)
req.addParent(currentServlet)
#Get the new servlet
self.createServletInTransaction(trans)
#call the servlet, but not session, it's already alive
try:
trans.servlet().awake(trans)
try:
trans.servlet().respond(trans)
except EndResponse:
pass
trans.servlet().sleep(trans)
except EndResponse:
pass
self.returnInstance(trans,trans.request().serverSidePath())
#replace things like they were
#trans.request()._serverSidePath=currentPath
req.setURLPath(currentPath)
req.popParent()
trans._servlet=currentServlet
def forwardRequestFast(self, trans, url):
print "forwardRequestFast is deprecated. Use includeURL()"
return self.includeURL(trans, url)
def callMethodOfServlet(self, trans, URL, method, *args, **kwargs):
"""
Enable a servlet to call a method of another servlet. Note: the servlet's awake() is called,
then the method is called with the given arguments, then sleep() is called. The result
of the method call is returned.
"""
req = trans.request()
if debug: print "> callMethodOfServlet(%s, %s)" % (URL, method)
# Save the current url path and servlet
currentPath = req.urlPath()
currentServlet = trans._servlet
urlPath = self.processURLPath( req, URL )
# Modify the request to use the new URL path
req.setURLPath(urlPath)
# Add the current servlet as a parent
req.addParent(currentServlet)
# Get the new servlet
self.createServletInTransaction(trans)
# Awaken, call the method, and sleep
servlet = trans.servlet()
try:
servlet.awake(trans)
try:
result = getattr(servlet, method)(*args, **kwargs)
except EndResponse:
pass
servlet.sleep(trans)
except EndResponse:
pass
# Return the servlet instance to the cache
self.returnInstance(trans, trans.request().serverSidePath())
# Replace things like they were
req.setURLPath(currentPath)
req.popParent()
trans._servlet=currentServlet
# Done
return result
## Transactions ##
def awake(self, transaction):
transaction.awake()
def respond(self, transaction):
transaction.respond()
def sleep(self, transaction):
transaction.sleep()
# Store the session
if transaction.hasSession():
self._sessions.storeSession(transaction.session())
## Sessions ##
def session(self, sessionId, default=NoDefault):
if default is NoDefault:
return self._sessions[sessionId]
else:
return self._sessions.get(sessionId, default)
def hasSession(self, sessionId):
return self._sessions.has_key(sessionId)
def sessions(self):
return self._sessions
## Misc Access ##
def server(self):
return self._server
def serverSidePath(self, path=None):
""" Returns the absolute server-side path of the WebKit application. If the optional path is passed in, then it is joined with the server side directory to form a path relative to the app server.
"""
if path:
return os.path.normpath(os.path.join(self._serverSidePath, path))
else:
return self._serverSidePath
def webwarePath(self):
return self._server.webwarePath()
def webKitPath(self):
return self._server.webKitPath()
def name(self):
return sys.argv[0]
def transactionClass(self):
return self._transactionClass
def setTransactionClass(self, newClass):
assert isclass(newClass)
self._transactionClass = newClass
def responseClass(self, newClass):
return self._responseClass
def setResponseClass(self, newClass):
assert isclass(newClass)
self._responseClass = newClass
## Contexts ##
def context(self, name, default=NoDefault):
""" Returns the value of the specified context. """
if default is NoDefault:
return self._contexts[name]
else:
return self._contexts.get(name, default)
def hasContext(self, name):
return self._contexts.has_key(name)
def _setContext(self, name, value):#use addContext
if self._contexts.has_key(name):
print 'WARNING: Overwriting context %s (=%s) with %s' % (
repr(name), repr(self._contexts[name]), repr(value))
self._contexts[name] = value
def contexts(self):
return self._contexts
def addContext(self, name, dir):
if self._contexts.has_key(name):
print 'WARNING: Overwriting context %s (=%s) with %s' % (
repr(name), repr(self._contexts[name]), repr(dir))
__contextInitialized = 1 # Assume already initialized.
else:
__contextInitialized = 0
try:
importAsName = name
localdir, pkgname = os.path.split(dir)
if sys.modules.has_key(importAsName):
mod = sys.modules.get(importAsName)
else:
res = imp.find_module(pkgname, [localdir])
mod = imp.load_module(name, res[0], res[1], res[2])
__contextInitialized = 0 # overwriting context - re-initialize
except ImportError,e:
print "Error loading context: %s: %s: dir=%s" % (name, e, dir)
return
if not __contextInitialized and mod.__dict__.has_key('contextInitialize'):
result = mod.__dict__['contextInitialize'](self,
os.path.normpath(os.path.join(os.getcwd(),dir)))
if result != None and result.has_key('ContentLocation'):
dir = result['ContentLocation']
print 'Loading context: %s at %s' % (name, dir)
self._contexts[name] = dir
## Factory access ##
def addServletFactory(self, factory):
assert isinstance(factory, ServletFactory)
self._factoryList.append(factory)
for ext in factory.extensions():
assert not self._factoryByExt.has_key(ext), 'Extension (%s) for factory (%s) was already used by factory (%s)' % (ext, self._factoryByExt[ext].name(), factory.name())
self._factoryByExt[ext] = factory
def factories(self):
return self._factoryList
## Activity Log ##
def writeActivityLog(self, transaction):
"""
Writes an entry to the script log file. Uses settings ActivityLogFilename and ActivityLogColumns.
"""
filename = self.serverSidePath(self.setting('ActivityLogFilename'))
if os.path.exists(filename):
file = open(filename, 'a')
else:
file = open(filename, 'w')
file.write(string.join(self.setting('ActivityLogColumns'), ',')+'\n')
values = []
# We use UserDict on the next line because we know it inherits NamedValueAccess and reponds to valueForName()
objects = UserDict({
'application': self,
'transaction': transaction,
'request': transaction.request(),
'response': transaction.response(),
'servlet': transaction.servlet(),
'session': transaction._session, #don't cause creation of session
})
for column in self.setting('ActivityLogColumns'):
try:
value = objects.valueForName(column)
except:
value = '(unknown)'
if type(value) is FloatType:
value = '%0.2f' % value # probably need more flexibility in the future
else:
value = str(value)
values.append(value)
file.write(string.join(values, ',')+'\n')
file.close()
for i in objects.keys():
objects[i]=None
## Utilities/Hooks ##
def createRequestForDict(self, newRequestDict):
return self._requestClass(dict=newRequestDict)
def createTransactionForRequest(self, request):
trans = self._transactionClass(application=self, request=request)
request.setTransaction(trans)
return trans
def createResponseInTransaction(self, transaction, strmOut):
response = self._responseClass(transaction, strmOut)
transaction.setResponse(response)
return response
def createSessionForTransaction(self, transaction):
debug = self.setting('Debug')['Sessions']
if debug: prefix = '>> [session] createSessionForTransaction:'
sessId = transaction.request().sessionId()
if debug: print prefix, 'sessId =', sessId
if sessId:
session = self.session(sessId)
if debug: print prefix, 'retrieved session =', session
else:
session = self._sessionClass(transaction)
self._sessions[session.identifier()] = session
if debug: print prefix, 'created session =', session
transaction.setSession(session)
return session
def getServlet(self, transaction, path, cache=None): #send the cache if you want the cache info set
ext = os.path.splitext(path)[1]
# Add the path to sys.path. @@ 2000-05-09 ce: not the most ideal solution, but works for now
dir = os.path.dirname(path)
factory = self._factoryByExt.get(ext, None)
if not factory:
factory = self._factoryByExt.get('.*', None) # special case: .* is the catch-all
if not factory:
raise ApplicationError, 'Unknown extension (%s). No factory found.' % ext
# ^ @@ 2000-05-03 ce: Maybe the web browser doesn't want an exception for bad extensions. We probably need a nicer message to the user...
# On the other hand, that can always be done by providing a factory for '.*'
assert factory.uniqueness()=='file', '%s uniqueness is not supported.' % factory.uniqueness()
# @@ 2001-05-10 gat: removed this because it allows 2 different copies of the same
# module to be imported, one as "foo" and one as "context.foo".
#if not dir in sys.path:
# sys.path.insert(0, dir)
inst = factory.servletForTransaction(transaction)
assert inst is not None, 'Factory (%s) failed to create a servlet upon request.' % factory.name()
if cache:
cache['threadsafe']=inst.canBeThreaded()
cache['reuseable']=inst.canBeReused()
return inst
def returnInstance(self, transaction, path):
""" The only case I care about now is threadsafe=0 and reuseable=1"""
cache = self._servletCacheByPath.get(path, None)
if cache and cache['reuseable'] and not cache['threadsafe']:
srv = transaction.servlet()
if srv:
cache['instances'].append(transaction.servlet())
return
def newServletCacheItem(self,key,item):
""" Safely add new item to the main cache. Not worried about the retrieval for now.
I'm not even sure this is necessary, as it's a one bytecode op, but it doesn't cost
much of anything speed wise.
"""
#self._cacheDictLock.acquire()
self._servletCacheByPath[key] = item
#self._cacheDictLock.release()
def flushServletCache(self):
self._servletCacheByPath = {}
def createServletInTransaction(self, transaction):
# Get the path
path = transaction.request().serverSidePath()
assert path is not None
inst = None
cache = None
# Cached?
if self._cacheServletInstances:
cache = self._servletCacheByPath.get(path, None)
# File is not newer?
if cache and cache['timestamp']<os.path.getmtime(path):
cache['instances'][:] = []
cache = None
if not cache:
cache = {
'instances': [],
'path': path,
'timestamp': os.path.getmtime(path),
'threadsafe': 0,
'reuseable': 0,
}
self.newServletCacheItem(path,cache)
inst = self.getServlet(transaction,path,cache)
if cache['threadsafe']:
"""special case, put in the cache now"""
cache['instances'].append(inst)
# Instance can be reused?
elif not cache['reuseable']:
# One time servlet
inst = self.getServlet(transaction, path)
elif not cache['threadsafe']:
# Not threadsafe, so need multiple instances
try:
inst = cache['instances'].pop()
except IndexError: # happens if list was empty
inst = self.getServlet(transaction, path)
else:
# Must be reuseable and threadsafe - just use the instance in the cache
# without removing it
inst = cache['instances'][0]
# Set the transaction's servlet
transaction.setServlet(inst)
def handleExceptionInTransaction(self, excInfo, transaction):
if self._exceptionHandlerClass is None:
self._exceptionHandlerClass = ExceptionHandler
self._exceptionHandlerClass(self, transaction, excInfo)
def handleException(self, excInfo=None):
"""Handle the exception by calling the configured ExceptinHandler.
Note that the exception handler must be capable of taking
a transaction of None for exceptions that occur outside of
a transaction.
"""
if excInfo is None:
excInfo = sys.exc_info()
if self._exceptionHandlerClass is None:
self._exceptionHandlerClass = ExceptionHandler
self._exceptionHandlerClass(self, None, excInfo)
def filenamesForBaseName(self, baseName):
"""Returns a list of all filenames with extensions existing for
baseName, but not including extension found in the setting
ExtensionsToIgnore. This utility method is used by
serverSideInfoForRequest(). Example: '/a/b/c' could yield
['/a/b/c.py', '/a/b/c.html'], but will never yield a
'/a/b/c.pyc' filename since .pyc files are ignored."""
if string.find(baseName, '*') >= 0:
return []
filenames = []
ignoreExts = self.setting('ExtensionsToIgnore')
for filename in glob(baseName+'.*'):
# consider this because CVS leaves files with extensions like '*.py.~1.2.3~'
# filename[-1:] == '~': continue
if os.path.splitext(filename)[1] not in ignoreExts:
# @@ 2000-06-22 ce: linear search
filenames.append(filename)
extensionsToServe = self.setting('ExtensionsToServe')
if extensionsToServe:
filteredFilenames = []
for filename in filenames:
if os.path.splitext(filename)[1] in extensionsToServe:
filteredFilenames.append(filename)
filenames = filteredFilenames
if debug:
print '>> filenamesForBaseName(%s) returning %s' % (
repr(baseName), repr(filenames))
return filenames
def defaultContextNameAndPath(self):
"""
Returns the default context name and path in a tuple. If there's an explicitly named context with the same
path as the "default" context, then we'll use that name instead. Otherwise, we'll just
use "default" as the name.
"""
if not self._defaultContextName:
defaultContextPath = self._contexts['default']
for contextName, contextPath in self._contexts.items():
if contextPath == defaultContextPath:
self._defaultContextName = contextName
break
else:
self._defaultContextName = 'default'
return self._defaultContextName, self.context(self._defaultContextName)
def serverSideInfoForRequest(self, request):
"""
Returns a tuple (requestPath, contextPath, contextName) where requestPath is
the server-side path of this request, contextPath is the
server-side path of the context for this request, and contextName is the
name of the context, which is not necessarily the same as the name
of the directory that houses the context.
This is a 'private' service method for use by HTTPRequest.
Returns (None, None, None) if there is no corresponding server side path for the URL.
This method supports:
* Contexts
* A default context
* Auto discovery of directory vs. file
* For directories, auto discovery of file, configured by DirectoryFile
* For files, auto discovery of extension, configured by ExtensionsToIgnore
* Rejection of files (not directories) that end in a slash (/)
* "Extra path" URLs where the servlet is actually embedded in the path
as opposed to being at the end of it. (ex: http://foo.com/servlet/extra/path).
The ExtraPath information will be available through request.extraPathInfo().
The Application.config file must have ExtraPathInfo set to 1 for this to be functional.
IF YOU CHANGE THIS VERY IMPORTANT, SUBTLE METHOD, THEN PLEASE REVIEW
AND COMPLETE http://localhost/WebKit.cgi/Testing/ BEFORE CHECKING IN
OR SUBMITTING YOUR CHANGES.
"""
debug=0
extraURLPath=''
urlPath = request.urlPath()
if debug: print '>> urlPath =', repr(urlPath)
##if the requested file is in the filesystem outside of any context...
if request._absolutepath:
if isdir(urlPath):
urlPath = self.findDirectoryIndex(urlPath, debug)
return urlPath, None, None #no contextpath, no contextname
# try the cache first
ssPath, contextPath, contextName = self._serverSideInfoCacheByPath.get(urlPath, (None, None, None))
if ssPath is not None:
if debug: print '>> returning path from cache: %s' % repr(ssPath)
return ssPath, contextPath, contextName
# case: no URL then use the default context
if urlPath=='' or urlPath=='/':
contextName, ssPath = self.defaultContextNameAndPath()
if debug:
print '>> no urlPath, so using default context %s at path: %s' % (contextName, ssPath)
else:
# Check for and process context name:
assert urlPath[0]=='/', 'urlPath=%s' % repr(urlPath)
if string.rfind(urlPath, '/')>0: # no / in url (other than the preceding /)
blank, contextName, restOfPath = string.split(urlPath, '/', 2)
else:
contextName, restOfPath = urlPath[1:], ''
if debug: print '>> contextName=%s, restOfPath=%s' % (repr(contextName), repr(restOfPath))
# Look for context
try:
prepath = self._contexts[contextName]
except KeyError:
restOfPath = urlPath[1:] # put the old path back, there's no context here
contextName, prepath = self.defaultContextNameAndPath()
if debug:
print '>> context not found so assuming default:'
if debug: print '>> ContextName=%s, prepath=%s, restOfPath=%s' % (contextName, repr(prepath), repr(restOfPath))
#ssPath = os.path.join(prepath, restOfPath)
if restOfPath != '':
ssPath = prepath + os.sep + restOfPath
else:
ssPath = prepath
if debug: print ">> ssPath= %s" % ssPath
contextPath = self._contexts[contextName]
lastChar = ssPath[-1]
ssPath = os.path.normpath(ssPath)
# 2000-07-06 ce: normpath() chops off a trailing / (or \)
# which is NOT what we want. This makes the test case
# http://localhost/WebKit.cgi/Welcome/ pass when it should
# fail. URLs that name files must not end in slashes because
# relative URLs in the resulting document will get appended
# to the URL, instead of replacing the last component.
if lastChar=='\\' or lastChar=='/':
if debug: print "lastChar was %s" % lastChar
ssPath = ssPath + os.sep
if debug: print '>> normalized ssPath =', repr(ssPath)
if self.setting('ExtraPathInfo'): #check for extraURLPath
ssPath, urlPath, extraURLPath = self.processExtraURLPath(ssPath, urlPath, debug)
request.setURLPath(urlPath)
request._extraURLPath = extraURLPath
##Finish extraURLPath checks
##Check cache again
cachePath, cacheContextPath, cacheContextName = self._serverSideInfoCacheByPath.get(urlPath, (None, None, None))
if cachePath is not None:
if debug:
print 'checked cache for urlPath %s' % urlPath
print '>> returning path for %s from cache: %s' % (repr(ssPath), repr(cachePath))
return cachePath, cacheContextPath, cacheContextName
if isdir(ssPath):
# URLs that map to directories need to have a trailing slash.
# If they don't, then relative links in the web page will not be
# constructed correctly by the browser.
# So in the following if statement, we're bailing out for such URLs.
# dispatchRequest() will detect the situation and handle the redirect.
if debug: print ">> ssPath is a directory"
if extraURLPath == '' and (urlPath=='' or urlPath[-1]!='/'):
if debug:
print '>> BAILING on directory url: %s' % repr(urlPath)
return ssPath, contextPath, contextName
ssPath = self.findDirectoryIndex(ssPath, debug)
elif os.path.splitext(ssPath)[1]=='':
# At this point we have a file (or a bad path)
filenames = self.filenamesForBaseName(ssPath)
if len(filenames)==1:
ssPath = filenames[0]
if debug: print '>> discovered extension, file = %s' % repr(ssPath)
elif len(filenames) > 1:
foundMatch = 0
if self.setting('UseCascadingExtensions'):
for ext in self.setting('ExtensionCascadeOrder'):
if (ssPath + ext) in filenames:
ssPath = ssPath + ext
foundMatch = 1
break
if not foundMatch:
print 'WARNING: For %s, did not get precisely 1 filename: %s' %\
(urlPath, filenames)
return None, None, None
else:
return None, None, None
elif not os.path.isfile(ssPath):
return None, None, None
self._serverSideInfoCacheByPath[urlPath] = ssPath, contextPath, contextName
if debug:
print '>> returning %s, %s, %s\n' % (repr(ssPath), repr(contextPath), repr(contextName))
return ssPath, contextPath, contextName
def findDirectoryIndex(self, ssPath, debug=0):
"""
Given a url that points to a directory, find an index file in that directory.
"""
# URLs that map to directories need to have a trailing slash.
# If they don't, then relative links in the web page will not be
# constructed correctly by the browser.
# So in the following if statement, we're bailing out for such URLs.
# dispatchRequest() will detect the situation and handle the redirect.
# Handle directories
if debug: print '>> directory = %s' % repr(ssPath)
for dirFilename in self.setting('DirectoryFile'):
filenames = self.filenamesForBaseName(os.path.join(ssPath, dirFilename))
num = len(filenames)
if num==1:
break # we found a file to handle the directory
elif num>1:
print 'WARNING: the directory is %s which contains more than 1 directory file: %s' % (ssPath, filenames)
return None
if num==0:
if debug: print 'WARNING: For %s, the directory contains no directory file.' % (ssPath)
return None
ssPath = filenames[0] # our path now includes the filename within the directory
if debug: print '>> discovered directory file = %s' % repr(ssPath)
return ssPath
def processExtraURLPath(self, ssPath, urlPath, debug=0):
"""
given a server side path (ssPath) and the original request URL (urlPath), determine which portion of the URL is a request path and which portion is extra request information.
Return a tuple of:
ssPath: the corrected (truncted) ssPath,
urlPath: the corrected (trunctated) urlPath,
extraPathInfo: the extra path info
"""
extraURLPath = ''
if debug: print "*** processExtraURLPath starting for ssPath=", ssPath
if os.path.exists(ssPath): ##bail now if the whole thing exists
if debug: print "*** entire ssPath exists"
return ssPath, urlPath, ''
if debug: print "starting ssPath=%s, urlPath=%s " % (ssPath, urlPath)
goodindex = 0 #this marks the last point where the path exists
index = string.find(ssPath, os.sep)
if index == -1: return ssPath, urlPath, extraURLInfo ##bail if no seps found
if not index: index=1 #start with at least one character
if debug: print "testing ", ssPath[:index]
while os.path.exists(ssPath[:index]) and index != -1:
goodindex = index
index = string.find(ssPath, os.sep, index+1)
if debug: print "testing ", ssPath[:index]
if debug: print "quitting loop with goodindex= ",ssPath[:goodindex]
if index != -1: ##there is another slash, but we already know its invalid
if debug: print "last loop got an index of -1"
searchpath = ssPath[:index]
else: #no more slashes, so the last element is either a file without an extension, or the real URL is a directory and the last piece is extraURLInfo
searchpath = ssPath
## Now test to see if the next element is a file without an extension
filenames = self.filenamesForBaseName(searchpath)
if debug: print "found %s valid files" % len(filenames)
if len(filenames)>0:
extralen=0
else:
extralen = len(ssPath) - goodindex
if isdir(ssPath[:goodindex]):
extralen = extralen-1 ##leave the last slash on the path
if extralen > 0:
urlPath, extraURLPath = urlPath[:-extralen] , urlPath[-extralen:]
ssPath = ssPath[:-extralen]
if extraURLPath and extraURLPath[0] != '/':
extraURLPath = '/' + extraURLPath
if debug: print "processExtraURLPath returning %s, %s, %s" % ( ssPath, urlPath, extraURLPath )
return ssPath, urlPath, extraURLPath
def writeExceptionReport(self, handler):
# Nothing particularly useful that I can think of needs to be
# added to the exception reports by the Application.
# See ExceptionHandler.py for more info.
pass
## New Path Algorithm ##
def serverSideInfoForRequestNewAlgorithm(self, request):
"""
Returns a tuple (requestPath, contextPath,
contextName) where requestPath is the server-side path
of this request, contextPath is the server-side path
of the context for this request, and contextName is
the name of the context, which is not necessarily the
same as the name of the directory that houses the
context.
Returns (None, None, None) if there is no
corresponding server side path for the URL.
"""
fullPath = request.urlPath()
contextPath, contextName, rest = self.findContext(fullPath)
servletPath, extraPath = self.findServlet(contextPath, rest)
request._extraURLPath = extraPath
if debug: print "> ssifr na:",(servletPath, contextPath, contextName, extraPath, request.urlPath())
return (servletPath, contextPath, contextName)
def findContext(self, fullPath):
"""
Internal method: returns (contextPath, contextName, restOfPath)
restOfPath will start with a /
"""
assert not fullPath or fullPath[0] == '/'
if not fullPath or fullPath == '/':
contextName, contextPath = self.defaultContextNameAndPath()
return (contextPath, contextName, fullPath)
pathParts = string.split(fullPath, '/', 2)
if len(pathParts) == 3:
blank, first, rest = pathParts
elif len(pathParts) == 2:
first, rest = pathParts[1], ''
else:
first, rest = '', ''
if not self._contexts.has_key(first):
contextName, contextPath = self.defaultContextNameAndPath()
return (contextPath, contextName, fullPath)
else:
return (self._contexts[first], first, '/' + rest)
def findServlet(self, contextPath, urlPath):
"""
Internal method: returns (servletPath, extraURLPath)
extraURLPath will start with '/' (unless no extraURLPath
was given, in which case extraURLPath will be '')
"""
cache = self._serverSideInfoCacheByPathNew
if cache.has_key(urlPath):
return (cache[urlPath], '')
parts = string.split(urlPath, '/')
for i in range(len(parts)):
url = string.join(parts[:-i], '/')
if cache.has_key(url):
return cache[url], '/' + string.join(parts[-i:], '/')
currentPath = contextPath
while 1:
if not parts:
filename = self.findDirectoryIndex(currentPath)
if filename:
return (filename, '')
else:
return None, None # 404 Not Found
first = parts[0]
if os.path.isdir(os.path.join(currentPath, first)):
currentPath = os.path.join(currentPath, first)
parts = parts[1:]
continue
filenames = self.filenamesForBaseNameNew(os.path.join(currentPath, first))
if filenames:
if len(filenames) == 1:
return (filenames[0],
'/' + string.join(parts[1:], '/'))
print "WARNING: More than one file matches basename %s (%s)" % (repr(os.path.join(currentPath, first)), filenames)
return None, None
else:
filename = self.findDirectoryIndex(currentPath)
if filename:
return (filename, '/' + string.join(parts, '/'))
return None, None
def filenamesForBaseNameNew(self, baseName):
if string.find(baseName, '*') != -1:
return []
filenames = glob(baseName + "*")
good = []
toIgnore = self.setting('ExtensionsToIgnore')
toServe = self.setting('ExtensionsToServe')
for filename in filenames:
ext = os.path.splitext(filename)[1]
shortFilename = os.path.basename(filename)
if ext in toIgnore and filename != baseName:
continue
if toServe and ext not in toServe:
continue
for regex in self._filesToHideRegexes:
if regex.match(shortFilename):
continue
if self._filesToServeRegexes:
shouldServe = 0
for regex in self._filesToServeRegexes:
if regex.match(shortFilename):
shouldServe = 1
break
if not shouldServe:
continue
good.append(filename)
if len(good) > 1 and self.setting('UseCascadingExtensions'):
for extension in self.setting('ExtensionCascadeOrder'):
actualExtension = os.path.splitext(baseName)[1]
if baseName + extension in good \
or extension == actualExtension:
return [baseName + extension]
return good
## Deprecated ##
def serverSidePathForRequest(self, request, debug=0):
"""
This is maintained for backward compatibility; it just returns the first part of the tuple
returned by serverSideInfoForRequest.
"""
self.deprecated(self.serverSidePathForRequest)
return self.serverSideInfoForRequest(request, debug)[0]
def serverDir(self):
"""
deprecated: Application.serverDir() on 1/24 in ver 0.5, use serverSidePath() instead @
Returns the directory where the application server is located.
"""
self.deprecated(self.serverDir)
return self.serverSidePath()
def isdir(s):
"""
*** Be sure to use this isdir() function rather than os.path.isdir()
in this file.
2000-07-06 ce: Only on Windows, does an isdir() call with a
path ending in a slash fail to return 1. e.g.,
isdir('C:\\tmp\\')==0 while on UNIX isdir('/tmp/')==1.
"""
if s and os.name=='nt' and s[-1]==os.sep:
return os.path.isdir(s[:-1])
else:
return os.path.isdir(s)
def noslash(s):
""" Return 1 if s is blank or does end in /. A little utility for dispatchRequest(). """
return s=='' or s[-1]!='/'
def main(requestDict):
"""
Returns a raw reponse. This method is mostly used by OneShotAdapter.py.
"""
from WebUtils.HTMLForException import HTMLForException
try:
assert type(requestDict) is type({})
app = Application(useSessionSweeper=0)
return app.dispatchRawRequest(requestDict).response().rawResponse()
except:
return {
'headers': [('Content-type', 'text/html')],
'contents': '<html><body>%s</html></body>' % HTMLForException()
}
# You can run Application as a main script, in which case it expects a single
# argument which is a file containing a dictionary representing a request. This
# technique isn't very popular as Application itself could raise exceptions
# that aren't caught. See CGIAdapter.py and AppServer.py for a better example of
# how things should be done.
if __name__=='__main__':
if len(sys.argv)!=2:
sys.stderr.write('WebKit: Application: Expecting one filename argument.\n')
requestDict = eval(open(sys.argv[1]).read())
main(requestDict)