"""HTML 5 conformance checker Warning: this module is experimental, incomplete, and subject to removal at any time. Usage: >>> from html5lib.html5parser import HTMLParser >>> from html5lib.filters.validator import HTMLConformanceChecker >>> p = HTMLParser(tokenizer=HTMLConformanceChecker) >>> p.parse('\n') < None> >>> p.errors [((2, 14), 'unknown-attribute', {'attributeName': u'foo', 'tagName': u'html'})] """ try: frozenset except NameError: # Import from the sets module for python 2.3 from sets import Set as set from sets import ImmutableSet as frozenset import _base import iso639codes import rfc3987 import rfc2046 from html5lib.constants import E, spaceCharacters, digits, tokenTypes from html5lib import tokenizer import gettext _ = gettext.gettext E.update({ "unknown-start-tag": _(u"Unknown start tag <%(tagName)s>."), "unknown-attribute": _(u"Unknown '%(attributeName)s' attribute on <%(tagName)s>."), "missing-required-attribute": _(u"The '%(attributeName)s' attribute is required on <%(tagName)s>."), "unknown-input-type": _(u"Illegal value for attribute on ."), "attribute-not-allowed-on-this-input-type": _(u"The '%(attributeName)s' attribute is not allowed on ."), "deprecated-attribute": _(u"This attribute is deprecated: '%(attributeName)s' attribute on <%(tagName)s>."), "duplicate-value-in-token-list": _(u"Duplicate value in token list: '%(attributeValue)s' in '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-attribute-value": _(u"Invalid attribute value: '%(attributeName)s' attribute on <%(tagName)s>."), "space-in-id": _(u"Whitespace is not allowed here: '%(attributeName)s' attribute on <%(tagName)s>."), "duplicate-id": _(u"This ID was already defined earlier: 'id' attribute on <%(tagName)s>."), "attribute-value-can-not-be-blank": _(u"This value can not be blank: '%(attributeName)s' attribute on <%(tagName)s>."), "id-does-not-exist": _(u"This value refers to a non-existent ID: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-enumerated-value": _(u"Value must be one of %(enumeratedValues)s: '%(attributeName)s' attribute on <%tagName)s>."), "invalid-boolean-value": _(u"Value must be one of %(enumeratedValues)s: '%(attributeName)s' attribute on <%tagName)s>."), "contextmenu-must-point-to-menu": _(u"The contextmenu attribute must point to an ID defined on a element."), "invalid-lang-code": _(u"Invalid language code: '%(attributeName)s' attibute on <%(tagName)s>."), "invalid-integer-value": _(u"Value must be an integer: '%(attributeName)s' attribute on <%tagName)s>."), "invalid-root-namespace": _(u"Root namespace must be 'http://www.w3.org/1999/xhtml', or omitted."), "invalid-browsing-context": _(u"Value must be one of ('_self', '_parent', '_top'), or a name that does not start with '_': '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-tag-uri": _(u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-urn": _(u"Invalid URN: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-uri-char": _(u"Illegal character in URI: '%(attributeName)s' attribute on <%(tagName)s>."), "uri-not-iri": _(u"Expected a URI but found an IRI: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-uri": _(u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-http-or-ftp-uri": _(u"Invalid URI: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-scheme": _(u"Unregistered URI scheme: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-rel": _(u"Invalid link relation: '%(attributeName)s' attribute on <%(tagName)s>."), "invalid-mime-type": _(u"Invalid MIME type: '%(attributeName)s' attribute on <%(tagName)s>."), }) globalAttributes = frozenset(('class', 'contenteditable', 'contextmenu', 'dir', 'draggable', 'id', 'irrelevant', 'lang', 'ref', 'tabindex', 'template', 'title', 'onabort', 'onbeforeunload', 'onblur', 'onchange', 'onclick', 'oncontextmenu', 'ondblclick', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondragstart', 'ondrop', 'onerror', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 'onmessage', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup', 'onmousewheel', 'onresize', 'onscroll', 'onselect', 'onsubmit', 'onunload')) # XXX lang in HTML only, xml:lang in XHTML only # XXX validate ref, template allowedAttributeMap = { 'html': frozenset(('xmlns',)), 'head': frozenset(()), 'title': frozenset(()), 'base': frozenset(('href', 'target')), 'link': frozenset(('href', 'rel', 'media', 'hreflang', 'type')), 'meta': frozenset(('name', 'http-equiv', 'content', 'charset')), # XXX charset in HTML only 'style': frozenset(('media', 'type', 'scoped')), 'body': frozenset(()), 'section': frozenset(()), 'nav': frozenset(()), 'article': frozenset(()), 'blockquote': frozenset(('cite',)), 'aside': frozenset(()), 'h1': frozenset(()), 'h2': frozenset(()), 'h3': frozenset(()), 'h4': frozenset(()), 'h5': frozenset(()), 'h6': frozenset(()), 'header': frozenset(()), 'footer': frozenset(()), 'address': frozenset(()), 'p': frozenset(()), 'hr': frozenset(()), 'br': frozenset(()), 'dialog': frozenset(()), 'pre': frozenset(()), 'ol': frozenset(('start',)), 'ul': frozenset(()), 'li': frozenset(('value',)), # XXX depends on parent 'dl': frozenset(()), 'dt': frozenset(()), 'dd': frozenset(()), 'a': frozenset(('href', 'target', 'ping', 'rel', 'media', 'hreflang', 'type')), 'q': frozenset(('cite',)), 'cite': frozenset(()), 'em': frozenset(()), 'strong': frozenset(()), 'small': frozenset(()), 'm': frozenset(()), 'dfn': frozenset(()), 'abbr': frozenset(()), 'time': frozenset(('datetime',)), 'meter': frozenset(('value', 'min', 'low', 'high', 'max', 'optimum')), 'progress': frozenset(('value', 'max')), 'code': frozenset(()), 'var': frozenset(()), 'samp': frozenset(()), 'kbd': frozenset(()), 'sup': frozenset(()), 'sub': frozenset(()), 'span': frozenset(()), 'i': frozenset(()), 'b': frozenset(()), 'bdo': frozenset(()), 'ins': frozenset(('cite', 'datetime')), 'del': frozenset(('cite', 'datetime')), 'figure': frozenset(()), 'img': frozenset(('alt', 'src', 'usemap', 'ismap', 'height', 'width')), # XXX ismap depends on parent 'iframe': frozenset(('src',)), # handled separately 'object': frozenset(('data', 'type', 'usemap', 'height', 'width')), 'param': frozenset(('name', 'value')), 'video': frozenset(('src', 'autoplay', 'start', 'loopstart', 'loopend', 'end', 'loopcount', 'controls')), 'audio': frozenset(('src', 'autoplay', 'start', 'loopstart', 'loopend', 'end', 'loopcount', 'controls')), 'source': frozenset(('src', 'type', 'media')), 'canvas': frozenset(('height', 'width')), 'map': frozenset(()), 'area': frozenset(('alt', 'coords', 'shape', 'href', 'target', 'ping', 'rel', 'media', 'hreflang', 'type')), 'table': frozenset(()), 'caption': frozenset(()), 'colgroup': frozenset(('span',)), # XXX only if element contains no elements 'col': frozenset(('span',)), 'tbody': frozenset(()), 'thead': frozenset(()), 'tfoot': frozenset(()), 'tr': frozenset(()), 'td': frozenset(('colspan', 'rowspan')), 'th': frozenset(('colspan', 'rowspan', 'scope')), # all possible attributes are listed here but is really handled separately 'input': frozenset(('accept', 'accesskey', 'action', 'alt', 'autocomplete', 'autofocus', 'checked', 'disabled', 'enctype', 'form', 'inputmode', 'list', 'maxlength', 'method', 'min', 'max', 'name', 'pattern', 'step', 'readonly', 'replace', 'required', 'size', 'src', 'tabindex', 'target', 'template', 'value')), 'form': frozenset(('action', 'method', 'enctype', 'accept', 'name', 'onsubmit', 'onreset', 'accept-charset', 'data', 'replace')), 'button': frozenset(('action', 'enctype', 'method', 'replace', 'template', 'name', 'value', 'type', 'disabled', 'form', 'autofocus')), # XXX may need matrix of acceptable attributes based on value of type attribute (like input) 'select': frozenset(('name', 'size', 'multiple', 'disabled', 'data', 'accesskey', 'form', 'autofocus')), 'optgroup': frozenset(('disabled', 'label')), 'option': frozenset(('selected', 'disabled', 'label', 'value')), 'textarea': frozenset(('maxlength', 'name', 'rows', 'cols', 'disabled', 'readonly', 'required', 'form', 'autofocus', 'wrap', 'accept')), 'label': frozenset(('for', 'accesskey', 'form')), 'fieldset': frozenset(('disabled', 'form')), 'output': frozenset(('form', 'name', 'for', 'onforminput', 'onformchange')), 'datalist': frozenset(('data',)), # # XXX repetition model for repeating form controls 'script': frozenset(('src', 'defer', 'async', 'type')), 'noscript': frozenset(()), 'noembed': frozenset(()), 'event-source': frozenset(('src',)), 'details': frozenset(('open',)), 'datagrid': frozenset(('multiple', 'disabled')), 'command': frozenset(('type', 'label', 'icon', 'hidden', 'disabled', 'checked', 'radiogroup', 'default')), 'menu': frozenset(('type', 'label', 'autosubmit')), 'datatemplate': frozenset(()), 'rule': frozenset(()), 'nest': frozenset(()), 'legend': frozenset(()), 'div': frozenset(()), 'font': frozenset(('style',)) } requiredAttributeMap = { 'link': frozenset(('href', 'rel')), 'bdo': frozenset(('dir',)), 'img': frozenset(('src',)), 'embed': frozenset(('src',)), 'object': frozenset(()), # XXX one of 'data' or 'type' is required 'param': frozenset(('name', 'value')), 'source': frozenset(('src',)), 'map': frozenset(('id',)) } inputTypeAllowedAttributeMap = { 'text': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'inputmode', 'list', 'maxlength', 'name', 'pattern', 'readonly', 'required', 'size', 'tabindex', 'value')), 'password': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'inputmode', 'maxlength', 'name', 'pattern', 'readonly', 'required', 'size', 'tabindex', 'value')), 'checkbox': frozenset(('accesskey', 'autofocus', 'checked', 'disabled', 'form', 'name', 'required', 'tabindex', 'value')), 'radio': frozenset(('accesskey', 'autofocus', 'checked', 'disabled', 'form', 'name', 'required', 'tabindex', 'value')), 'button': frozenset(('accesskey', 'autofocus', 'disabled', 'form', 'name', 'tabindex', 'value')), 'submit': frozenset(('accesskey', 'action', 'autofocus', 'disabled', 'enctype', 'form', 'method', 'name', 'replace', 'tabindex', 'target', 'value')), 'reset': frozenset(('accesskey', 'autofocus', 'disabled', 'form', 'name', 'tabindex', 'value')), 'add': frozenset(('accesskey', 'autofocus', 'disabled', 'form', 'name', 'tabindex', 'template', 'value')), 'remove': frozenset(('accesskey', 'autofocus', 'disabled', 'form', 'name', 'tabindex', 'value')), 'move-up': frozenset(('accesskey', 'autofocus', 'disabled', 'form', 'name', 'tabindex', 'value')), 'move-down': frozenset(('accesskey', 'autofocus', 'disabled', 'form', 'name', 'tabindex', 'value')), 'file': frozenset(('accept', 'accesskey', 'autofocus', 'disabled', 'form', 'min', 'max', 'name', 'required', 'tabindex')), 'hidden': frozenset(('disabled', 'form', 'name', 'value')), 'image': frozenset(('accesskey', 'action', 'alt', 'autofocus', 'disabled', 'enctype', 'form', 'method', 'name', 'replace', 'src', 'tabindex', 'target')), 'datetime': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'list', 'min', 'max', 'name', 'step', 'readonly', 'required', 'tabindex', 'value')), 'datetime-local': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'list', 'min', 'max', 'name', 'step', 'readonly', 'required', 'tabindex', 'value')), 'date': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'list', 'min', 'max', 'name', 'step', 'readonly', 'required', 'tabindex', 'value')), 'month': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'list', 'min', 'max', 'name', 'step', 'readonly', 'required', 'tabindex', 'value')), 'week': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'list', 'min', 'max', 'name', 'step', 'readonly', 'required', 'tabindex', 'value')), 'time': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'list', 'min', 'max', 'name', 'step', 'readonly', 'required', 'tabindex', 'value')), 'number': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'list', 'min', 'max', 'name', 'step', 'readonly', 'required', 'tabindex', 'value')), 'range': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'list', 'min', 'max', 'name', 'step', 'readonly', 'required', 'tabindex', 'value')), 'email': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'inputmode', 'list', 'maxlength', 'name', 'pattern', 'readonly', 'required', 'tabindex', 'value')), 'url': frozenset(('accesskey', 'autocomplete', 'autofocus', 'disabled', 'form', 'inputmode', 'list', 'maxlength', 'name', 'pattern', 'readonly', 'required', 'tabindex', 'value')) } inputTypeDeprecatedAttributeMap = { 'text': frozenset(('size',)), 'password': frozenset(('size',)) } linkRelValues = frozenset(('alternate', 'archive', 'archives', 'author', 'contact', 'feed', 'first', 'begin', 'start', 'help', 'icon', 'index', 'top', 'contents', 'toc', 'last', 'end', 'license', 'copyright', 'next', 'pingback', 'prefetch', 'prev', 'previous', 'search', 'stylesheet', 'sidebar', 'tag', 'up')) aRelValues = frozenset(('alternate', 'archive', 'archives', 'author', 'contact', 'feed', 'first', 'begin', 'start', 'help', 'index', 'top', 'contents', 'toc', 'last', 'end', 'license', 'copyright', 'next', 'prev', 'previous', 'search', 'sidebar', 'tag', 'up', 'bookmark', 'external', 'nofollow')) class HTMLConformanceChecker(_base.Filter): def __init__(self, stream, encoding, parseMeta, **kwargs): _base.Filter.__init__(self, tokenizer.HTMLTokenizer( stream, encoding, parseMeta, **kwargs)) self.thingsThatDefineAnID = [] self.thingsThatPointToAnID = [] self.IDsWeHaveKnownAndLoved = [] def __iter__(self): types = dict((v,k) for k,v in tokenTypes.iteritems()) for token in _base.Filter.__iter__(self): fakeToken = {"type": types.get(token.get("type", "-"), "-"), "name": token.get("name", "-").capitalize()} method = getattr(self, "validate%(type)s%(name)s" % fakeToken, None) if method: for t in method(token) or []: yield t else: method = getattr(self, "validate%(type)s" % fakeToken, None) if method: for t in method(token) or []: yield t yield token for t in self.eof() or []: yield t ########################################################################## # Start tag validation ########################################################################## def validateStartTag(self, token): for t in self.checkUnknownStartTag(token) or []: yield t for t in self.checkStartTagRequiredAttributes(token) or []: yield t for t in self.checkStartTagUnknownAttributes(token) or []: yield t for t in self.checkAttributeValues(token) or []: yield t def validateStartTagEmbed(self, token): for t in self.checkStartTagRequiredAttributes(token) or []: yield t for t in self.checkAttributeValues(token) or []: yield t # spec says "any attributes w/o namespace" # so don't call checkStartTagUnknownAttributes def validateStartTagInput(self, token): for t in self.checkAttributeValues(token) or []: yield t attrDict = dict([(name.lower(), value) for name, value in token.get("data", [])]) inputType = attrDict.get("type", "text") if inputType not in inputTypeAllowedAttributeMap.keys(): yield {"type": tokenTypes["ParseError"], "data": "unknown-input-type", "datavars": {"attrValue": inputType}} allowedAttributes = inputTypeAllowedAttributeMap.get(inputType, []) for attrName, attrValue in attrDict.items(): if attrName not in allowedAttributeMap['input']: yield {"type": tokenTypes["ParseError"], "data": "unknown-attribute", "datavars": {"tagName": "input", "attributeName": attrName}} elif attrName not in allowedAttributes: yield {"type": tokenTypes["ParseError"], "data": "attribute-not-allowed-on-this-input-type", "datavars": {"attributeName": attrName, "inputType": inputType}} if attrName in inputTypeDeprecatedAttributeMap.get(inputType, []): yield {"type": tokenTypes["ParseError"], "data": "deprecated-attribute", "datavars": {"attributeName": attrName, "inputType": inputType}} ########################################################################## # Start tag validation helpers ########################################################################## def checkUnknownStartTag(self, token): # check for recognized tag name name = token.get("name", "").lower() if name not in allowedAttributeMap.keys(): yield {"type": tokenTypes["ParseError"], "data": "unknown-start-tag", "datavars": {"tagName": name}} def checkStartTagRequiredAttributes(self, token): # check for presence of required attributes name = token.get("name", "").lower() if name in requiredAttributeMap.keys(): attrsPresent = [attrName for attrName, attrValue in token.get("data", [])] for attrName in requiredAttributeMap[name]: if attrName not in attrsPresent: yield {"type": tokenTypes["ParseError"], "data": "missing-required-attribute", "datavars": {"tagName": name, "attributeName": attrName}} def checkStartTagUnknownAttributes(self, token): # check for recognized attribute names name = token.get("name").lower() allowedAttributes = globalAttributes | allowedAttributeMap.get(name, frozenset(())) for attrName, attrValue in token.get("data", []): if attrName.lower() not in allowedAttributes: yield {"type": tokenTypes["ParseError"], "data": "unknown-attribute", "datavars": {"tagName": name, "attributeName": attrName}} ########################################################################## # Attribute validation helpers ########################################################################## # def checkURI(self, token, tagName, attrName, attrValue): # isValid, errorCode = rfc3987.isValidURI(attrValue) # if not isValid: # yield {"type": tokenTypes["ParseError"], # "data": errorCode, # "datavars": {"tagName": tagName, # "attributeName": attrName}} # yield {"type": tokenTypes["ParseError"], # "data": "invalid-attribute-value", # "datavars": {"tagName": tagName, # "attributeName": attrName}} def checkIRI(self, token, tagName, attrName, attrValue): isValid, errorCode = rfc3987.isValidIRI(attrValue) if not isValid: yield {"type": tokenTypes["ParseError"], "data": errorCode, "datavars": {"tagName": tagName, "attributeName": attrName}} yield {"type": tokenTypes["ParseError"], "data": "invalid-attribute-value", "datavars": {"tagName": tagName, "attributeName": attrName}} def checkID(self, token, tagName, attrName, attrValue): if not attrValue: yield {"type": tokenTypes["ParseError"], "data": "attribute-value-can-not-be-blank", "datavars": {"tagName": tagName, "attributeName": attrName}} for c in attrValue: if c in spaceCharacters: yield {"type": tokenTypes["ParseError"], "data": "space-in-id", "datavars": {"tagName": tagName, "attributeName": attrName}} yield {"type": tokenTypes["ParseError"], "data": "invalid-attribute-value", "datavars": {"tagName": tagName, "attributeName": attrName}} break def parseTokenList(self, value): valueList = [] currentValue = '' for c in value + ' ': if c in spaceCharacters: if currentValue: valueList.append(currentValue) currentValue = '' else: currentValue += c if currentValue: valueList.append(currentValue) return valueList def checkTokenList(self, tagName, attrName, attrValue): # The "token" in the method name refers to tokens in an attribute value # i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of # but the "token" parameter refers to the token generated from # HTMLTokenizer. Sorry for the confusion. valueList = self.parseTokenList(attrValue) valueDict = {} for currentValue in valueList: if valueDict.has_key(currentValue): yield {"type": tokenTypes["ParseError"], "data": "duplicate-value-in-token-list", "datavars": {"tagName": tagName, "attributeName": attrName, "attributeValue": currentValue}} break valueDict[currentValue] = 1 def checkEnumeratedValue(self, token, tagName, attrName, attrValue, enumeratedValues): if not attrValue and ('' not in enumeratedValues): yield {"type": tokenTypes["ParseError"], "data": "attribute-value-can-not-be-blank", "datavars": {"tagName": tagName, "attributeName": attrName}} return attrValue = attrValue.lower() if attrValue not in enumeratedValues: yield {"type": tokenTypes["ParseError"], "data": "invalid-enumerated-value", "datavars": {"tagName": tagName, "attributeName": attrName, "enumeratedValues": tuple(enumeratedValues)}} yield {"type": tokenTypes["ParseError"], "data": "invalid-attribute-value", "datavars": {"tagName": tagName, "attributeName": attrName}} def checkBoolean(self, token, tagName, attrName, attrValue): enumeratedValues = frozenset((attrName, '')) if attrValue not in enumeratedValues: yield {"type": tokenTypes["ParseError"], "data": "invalid-boolean-value", "datavars": {"tagName": tagName, "attributeName": attrName, "enumeratedValues": tuple(enumeratedValues)}} yield {"type": tokenTypes["ParseError"], "data": "invalid-attribute-value", "datavars": {"tagName": tagName, "attributeName": attrName}} def checkInteger(self, token, tagName, attrName, attrValue): sign = 1 numberString = '' state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk') error = {"type": tokenTypes["ParseError"], "data": "invalid-integer-value", "datavars": {"tagName": tagName, "attributeName": attrName, "attributeValue": attrValue}} for c in attrValue: if state == 'begin': if c in spaceCharacters: pass elif c == '-': sign = -1 state = 'initial-number' elif c in digits: numberString += c state = 'in-number' else: yield error return elif state == 'initial-number': if c not in digits: yield error return numberString += c state = 'in-number' elif state == 'in-number': if c in digits: numberString += c else: state = 'trailing-junk' elif state == 'trailing-junk': pass if not numberString: yield {"type": tokenTypes["ParseError"], "data": "attribute-value-can-not-be-blank", "datavars": {"tagName": tagName, "attributeName": attrName}} def checkFloatingPointNumber(self, token, tagName, attrName, attrValue): # XXX pass def checkBrowsingContext(self, token, tagName, attrName, attrValue): if not attrValue: return if attrValue[0] != '_': return attrValue = attrValue.lower() if attrValue in frozenset(('_self', '_parent', '_top', '_blank')): return yield {"type": tokenTypes["ParseError"], "data": "invalid-browsing-context", "datavars": {"tagName": tagName, "attributeName": attrName}} def checkLangCode(self, token, tagName, attrName, attrValue): if not attrValue: return # blank is OK if not iso639codes.isValidLangCode(attrValue): yield {"type": tokenTypes["ParseError"], "data": "invalid-lang-code", "datavars": {"tagName": tagName, "attributeName": attrName, "attributeValue": attrValue}} def checkMIMEType(self, token, tagName, attrName, attrValue): # XXX needs tests if not attrValue: yield {"type": tokenTypes["ParseError"], "data": "attribute-value-can-not-be-blank", "datavars": {"tagName": tagName, "attributeName": attrName}} if not rfc2046.isValidMIMEType(attrValue): yield {"type": tokenTypes["ParseError"], "data": "invalid-mime-type", "datavars": {"tagName": tagName, "attributeName": attrName, "attributeValue": attrValue}} def checkMediaQuery(self, token, tagName, attrName, attrValue): # XXX pass def checkLinkRelation(self, token, tagName, attrName, attrValue): for t in self.checkTokenList(tagName, attrName, attrValue) or []: yield t valueList = self.parseTokenList(attrValue) allowedValues = (tagName == 'link') and linkRelValues or aRelValues for currentValue in valueList: if currentValue not in allowedValues: yield {"type": tokenTypes["ParseError"], "data": "invalid-rel", "datavars": {"tagName": tagName, "attributeName": attrName}} def checkDateTime(self, token, tagName, attrName, attrValue): # XXX state = 'begin' # ('begin', '... # for c in attrValue: # if state == 'begin': # if c in spaceCharacters: # continue # elif c in digits: # state = ... ########################################################################## # Attribute validation ########################################################################## def checkAttributeValues(self, token): tagName = token.get("name", "") fakeToken = {"tagName": tagName.capitalize()} for attrName, attrValue in token.get("data", []): attrName = attrName.lower() fakeToken["attributeName"] = attrName.capitalize() method = getattr(self, "validateAttributeValue%(tagName)s%(attributeName)s" % fakeToken, None) if method: for t in method(token, tagName, attrName, attrValue) or []: yield t else: method = getattr(self, "validateAttributeValue%(attributeName)s" % fakeToken, None) if method: for t in method(token, tagName, attrName, attrValue) or []: yield t def validateAttributeValueClass(self, token, tagName, attrName, attrValue): for t in self.checkTokenList(tagName, attrName, attrValue) or []: yield t yield {"type": tokenTypes["ParseError"], "data": "invalid-attribute-value", "datavars": {"tagName": tagName, "attributeName": attrName}} def validateAttributeValueContenteditable(self, token, tagName, attrName, attrValue): for t in self.checkEnumeratedValue(token, tagName, attrName, attrValue, frozenset(('true', 'false', ''))) or []: yield t def validateAttributeValueDir(self, token, tagName, attrName, attrValue): for t in self.checkEnumeratedValue(token, tagName, attrName, attrValue, frozenset(('ltr', 'rtl'))) or []: yield t def validateAttributeValueDraggable(self, token, tagName, attrName, attrValue): for t in self.checkEnumeratedValue(token, tagName, attrName, attrValue, frozenset(('true', 'false'))) or []: yield t validateAttributeValueIrrelevant = checkBoolean validateAttributeValueLang = checkLangCode def validateAttributeValueContextmenu(self, token, tagName, attrName, attrValue): for t in self.checkID(token, tagName, attrName, attrValue) or []: yield t self.thingsThatPointToAnID.append(token) def validateAttributeValueId(self, token, tagName, attrName, attrValue): # This method has side effects. It adds 'token' to the list of # things that define an ID (self.thingsThatDefineAnID) so that we can # later check 1) whether an ID is duplicated, and 2) whether all the # things that point to something else by ID (like