summaryrefslogtreecommitdiffstats
path: root/lib/jython/Lib/xml/parsers/xmlproc/xmlval.py
blob: 250684c1602a389bba8fc50e9781e1878ec95d07 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
"""This is the parts of xmlproc that are specific to validation. They
are an application class that receive data from the parser and a
subclass of the parser object that sets this up.

$Id: xmlval.py,v 1.11 2001/03/03 07:32:35 loewis Exp $
"""

import urlparse,os,anydbm,string,cPickle,time

from xmlproc import *
from xmldtd import *
from xmlapp import *

# ==============================
# The validator class
# ==============================

class XMLValidator:
    """XML parser that validates a document and does some of what is required
    of a validating parser, like adding fixed and default attribute values
    etc."""
    
    def __init__(self):
	self.parser=XMLProcessor()
        self.app=Application()
        self.dtd=CompleteDTD(self.parser)
        self.val=ValidatingApp(self.dtd,self.parser)
        self.reset()

    def parse_resource(self,sysid):
        self.parser.parse_resource(sysid)

    def reset(self):
        self.dtd.reset()
        self.val.reset()
        
        self.parser.reset()
	self.parser.set_application(self.val)
        self.parser.dtd=self.dtd
	self.parser.ent=self.dtd
        self.parser.set_read_external_subset(1)
        
    def feed(self,data):
        self.parser.feed(data)

    def close(self):
        self.parser.close()

    def deref(self):
        self.parser.deref()
        
    def set_application(self,app):
        self.app=app
	self.val.set_real_app(self.app)
	app.set_locator(self.parser)
	
    def set_error_language(self,language):
        self.parser.set_error_language(language)
        
    def set_error_handler(self,err):
	self.parser.set_error_handler(err)

    def set_dtd_listener(self,dtd_listener):
	self.parser.set_dtd_listener(dtd_listener)

    def set_inputsource_factory(self,isf):
        self.parser.set_inputsource_factory(isf)

    def set_pubid_resolver(self,pubres):
        self.val.set_pubid_resolver(pubres)
        self.parser.set_pubid_resolver(pubres)

    def set_data_after_wf_error(self,stop_on_wf=0):
        self.parser.set_data_after_wf_error(stop_on_wf)

    def set_sysid(self, sysid):
        self.parser.set_sysid(sysid)

    def set_read_external_subset(self,read_it):
        pass # This parser always reads it
        
    def get_dtd(self):
        return self.dtd

    def get_current_sysid(self):
	return self.parser.get_current_sysid()

    def get_offset(self):
	return self.parser.get_offset()
	
    def get_line(self):
	return self.parser.get_line()

    def get_column(self):
	return self.parser.get_column()

    def parseStart(self):
        self.parser.parseStart()

    def parseEnd(self):
        self.parser.parseEnd()

    def read_from(self,file,bufsize=16384):
        self.parser.read_from(file,bufsize)

    def flush(self):
        self.parser.flush()

    def report_error(self,errno,args=None):
        self.parser.report_error(errno,args)

    # ===== The introspection methods =====
        
    def get_elem_stack(self):
        "Returns the internal element stack. Note: this is a live list!"
        return self.parser.stack

    def get_data_buffer(self):
        "Returns the current data buffer."
        return self.parser.data

    def get_construct_start(self):
        """Returns the start position of the current construct (tag, comment,
        etc)."""
        return self.parser.prepos

    def get_construct_end(self):
        """Returns the end position of the current construct (tag, comment,
        etc)."""
        return self.parser.pos

    def get_raw_construct(self):
        "Returns the raw form of the current construct."
        return self.parser.data[self.parser.prepos:parser.self.pos]

    def get_current_ent_stack(self):
        """Returns a snapshot of the entity stack. A list of the system
        identifier of the entity and its name, if any."""
        return map(lambda ent: (ent[0],ent[9]),self.parser.ent_stack)
        
# ==============================
# Application object that checks the document
# ==============================

class ValidatingApp(Application):
    "The object that uses the DTD to actually validate XML documents."

    def __init__(self,dtd,parser):
	self.dtd=dtd
        self.parser=parser
	self.realapp=Application()
        self.pubres=PubIdResolver()
        self.reset()

    def reset(self):
	self.cur_elem=None
	self.cur_state=0
	self.stack=[]
	self.ids={}
	self.idrefs=[]        
        
    def set_real_app(self,app):
	self.realapp=app

    def set_pubid_resolver(self,pubres):
        self.pubres=pubres
                
    def set_locator(self,locator):
	Application.set_locator(self,locator)
	self.realapp.set_locator(locator)

    def handle_start_tag(self,name,attrs):
	decl_root=self.dtd.get_root_elem()
	
	if self.cur_elem!=None:
            if self.cur_state!=-1:
                next=self.cur_elem.next_state(self.cur_state,name)
                if next==0:
                    self.parser.report_error(2001,name)
                else:
                    self.cur_state=next

	    self.stack.append((self.cur_elem,self.cur_state))
	elif decl_root!=None and name!=decl_root:
	    self.parser.report_error(2002,name)

	try:
	    self.cur_elem=self.dtd.get_elem(name)
            self.cur_state=self.cur_elem.get_start_state()
	    self.validate_attributes(self.dtd.get_elem(name),attrs)
	except KeyError,e:
	    self.parser.report_error(2003,name)
	    self.cur_state=-1

	self.realapp.handle_start_tag(name,attrs)
	
    def handle_end_tag(self,name):
	"Notifies the application of end tags (and empty element tags)."
	if self.cur_elem!=None and \
	   not self.cur_elem.final_state(self.cur_state):
	    self.parser.report_error(2004,name)
	
	if self.stack!=[]:
	    (self.cur_elem,self.cur_state)=self.stack[-1]
	    del self.stack[-1]

	self.realapp.handle_end_tag(name)
    
    def handle_data(self,data,start,end):
	"Notifies the application of character data."
	if self.cur_elem!=None and self.cur_state!=-1:
	    next=self.cur_elem.next_state(self.cur_state,"#PCDATA")

	    if next==0:
                self.realapp.handle_ignorable_data(data,start,end)
                for ch in data[start:end]:
                    if not ch in " \t\r\n":
                        self.parser.report_error(2005)
                        break

                return		    
	    else:
		self.cur_state=next

	self.realapp.handle_data(data,start,end)

    def validate_attributes(self,element,attrs):
	"""Validates the attributes against the element declaration and adds
	fixed and default attributes."""

	# Check the values of the present attributes
	for attr in attrs.keys():
	    try:
		decl=element.get_attr(attr)
	    except KeyError,e:
		self.parser.report_error(2006,attr)
                return
        
            if decl.type!="CDATA":
                attrs[attr]=string.join(string.split(attrs[attr]))
            
            decl.validate(attrs[attr],self.parser)
                
	    if decl.type=="ID":
		if self.ids.has_key(attrs[attr]):
		    self.parser.report_error(2007,attrs[attr])
		self.ids[attrs[attr]]=""
	    elif decl.type=="IDREF":
		self.idrefs.append((self.locator.get_line(),
				    self.locator.get_column(),
				    attrs[attr]))
	    elif decl.type=="IDREFS":
		for idref in string.split(attrs[attr]):
		    self.idrefs.append((self.locator.get_line(),
					self.locator.get_column(),
					idref))
	    elif decl.type=="ENTITY":
                self.__validate_attr_entref(attrs[attr])
	    elif decl.type=="ENTITIES":
		for ent_ref in string.split(attrs[attr]):
                    self.__validate_attr_entref(ent_ref)

        # Check for missing required attributes
	for attr in element.get_attr_list():
	    decl=element.get_attr(attr)
	    if decl.decl=="#REQUIRED" and not attrs.has_key(attr):
		self.parser.report_error(2010,attr)

    def __validate_attr_entref(self,name):
        try:
            ent=self.dtd.resolve_ge(name)
            if ent.notation=="":
                self.parser.report_error(2008)
            else:
                try:
                    self.dtd.get_notation(ent.notation)
                except KeyError,e:
                    self.parser.report_error(2009,ent.notation)
        except KeyError,e:
            self.parser.report_error(3021,name)        
                
    def doc_end(self):
	for (line,col,id) in self.idrefs:
	    if not self.ids.has_key(id):
		self.parser.report_error(2011,id)

	self.realapp.doc_end()

    def handle_doctype(self,rootname,pub_id,sys_id):
 	self.realapp.handle_doctype(rootname,pub_id,sys_id)
	self.dtd.root_elem=rootname

    # --- These methods added only to make this hanger-on application
    #     invisible to external users.
		
    def doc_start(self):
	self.realapp.doc_start()
	
    def handle_comment(self,data):
	self.realapp.handle_comment(data)

    def handle_ignorable_data(self,data,start,end):
	self.realapp.handle_ignorable_data(data,start,end)

    def handle_pi(self,target,data):
	self.realapp.handle_pi(target,data)

    def set_entity_info(self,xmlver,enc,sddecl):
	self.realapp.set_entity_info(xmlver,enc,sddecl)