aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr3_cpp_runtime/include/antlr3input.inl
blob: 6837a06540aaa4d7e1174a4ccc2fcacb58ba4c2c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
namespace antlr3 {

template<class ImplTraits>
InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding)
{
    // First order of business is to read the file into some buffer space
    // as just straight 8 bit bytes. Then we will work out the encoding and
    // byte order and adjust the API functions that are installed for the
    // default 8Bit stream accordingly.
    //
    this->createFileStream(fileName);

    // We have the data in memory now so we can deal with it according to 
    // the encoding scheme we were given by the user.
    //
    m_encoding = encoding;

    // Now we need to work out the endian type and install any 
    // API functions that differ from 8Bit
    //
    this->setupInputStream();

    // Now we can set up the file name
    //	
    BaseType::m_streamName	= (const char* )fileName;
    m_fileName		= BaseType::m_streamName;
}

template<class ImplTraits>
InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name)
{
	// First order of business is to set up the stream and install the data pointer.
    // Then we will work out the encoding and byte order and adjust the API functions that are installed for the
    // default 8Bit stream accordingly.
    //
    this->createStringStream(data);
    
    // Size (in bytes) of the given 'string'
    //
    m_sizeBuf		= size;

    // We have the data in memory now so we can deal with it according to 
    // the encoding scheme we were given by the user.
    //
    m_encoding = encoding;

    // Now we need to work out the endian type and install any 
    // API functions that differ from 8Bit
    //
    this->setupInputStream();

    // Now we can set up the file name
    //	
    BaseType::m_streamName	= (name == NULL ) ? "" : (const char*)name;
    m_fileName		= BaseType::m_streamName;

}

template<class ImplTraits>
void InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data)
{
	if	(data == NULL)
	{
		ParseNullStringException ex;
		throw ex;
	}

	// Structure was allocated correctly, now we can install the pointer
	//
    m_data             = data;
    m_isAllocated	   = false;

	// Call the common 8 bit input stream handler
	// initialization.
	//
	this->genericSetupStream();
}

template<class ImplTraits>
void InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName)
{
	if	(fileName == NULL)
	{
		ParseFileAbsentException ex;
		throw ex;
	}

	// Structure was allocated correctly, now we can read the file.
	//
	FileUtils<ImplTraits>::AntlrRead8Bit(this, fileName);

	// Call the common 8 bit input stream handler
	// initialization.
	//
	this->genericSetupStream();
}

template<class ImplTraits>
void InputStream<ImplTraits>::genericSetupStream()
{
	this->set_charByteSize(1);
	
    /* Set up the input stream brand new
     */
    this->reset();
    
    /* Install default line separator character (it can be replaced
     * by the grammar programmer later)
     */
    this->set_newLineChar((ANTLR_UCHAR)'\n');
}

template<class ImplTraits>
InputStream<ImplTraits>::~InputStream()
{
	// Free the input stream buffer if we allocated it
    //
    if	(m_isAllocated && (m_data != NULL))
		AllocPolicyType::free((void*)m_data); //const_cast is required
}

template<class ImplTraits>
ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const
{
	return m_data;
}
template<class ImplTraits>
ANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const
{
	return m_isAllocated;
}
template<class ImplTraits>
ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const
{
	return m_nextChar;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const
{
	return m_sizeBuf;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const
{
	return m_line;
}
template<class ImplTraits>
ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const
{
	return m_currentLine;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const
{
	return m_charPositionInLine;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const
{
	return m_markDepth;
}
template<class ImplTraits>
ANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers()
{
	return m_markers;
}
template<class ImplTraits>
ANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const
{
	return m_fileName;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const
{
	return m_fileNo;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const
{
	return m_newlineChar;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const
{
	return m_charByteSize;
}
template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const
{
	return m_encoding;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data )
{
	m_data = data;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated )
{
	m_isAllocated = isAllocated;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar )
{
	m_nextChar = nextChar;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf )
{
	m_sizeBuf = sizeBuf;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line )
{
	m_line = line;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine )
{
	m_currentLine = currentLine;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
{
	m_charPositionInLine = charPositionInLine;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth )
{
	m_markDepth = markDepth;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers )
{
	m_markers = markers;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName )
{
	m_fileName = fileName;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo )
{
	m_fileNo = fileNo;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar )
{
	m_newlineChar = newlineChar;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize )
{
	m_charByteSize = charByteSize;
}
template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding )
{
	m_encoding = encoding;
}

template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine()
{
	++m_charPositionInLine;
}

template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::inc_line()
{
	++m_line;
}

template<class ImplTraits>
ANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth()
{
	++m_markDepth;
}

template<class ImplTraits>
ANTLR_INLINE void	InputStream<ImplTraits>::reset()
{
	m_nextChar		= m_data;	/* Input at first character */
    m_line			= 1;		/* starts at line 1	    */
    m_charPositionInLine	= 0;
    m_currentLine		= m_data;
    m_markDepth		= 0;		/* Reset markers	    */
    
    /* Clear out up the markers table if it is there
     */
	m_markers.clear();
}

template<class ImplTraits>
void    InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name)
{
	m_isAllocated	= false;
    m_data		= inString;
    m_sizeBuf	= size;
    
    // Now we can set up the file name. As we are reusing the stream, there may already
    // be a string that we can reuse for holding the filename.
    //
	if	( BaseType::m_streamName.empty() ) 
	{
		BaseType::m_streamName	= ((name == NULL) ? "-memory-" : (const char *)name);
		m_fileName		= BaseType::m_streamName;
	}
	else
	{
		BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name);
	}

    this->reset();
}

/*
template<class ImplTraits>
typename InputStream<ImplTraits>::DataType*	InputStream<ImplTraits>::LT(ANTLR_INT32 lt)
{
	return this->LA(lt);
}
*/

template<class ImplTraits>
ANTLR_UINT32	InputStream<ImplTraits>::size()
{
	return m_sizeBuf;
}

template<class ImplTraits>
ANTLR_MARKER	InputStream<ImplTraits>::index_impl()
{
	return (ANTLR_MARKER)m_nextChar;
}


template<class ImplTraits>
typename InputStream<ImplTraits>::StringType	InputStream<ImplTraits>::substr(ANTLR_MARKER start, ANTLR_MARKER stop)
{
	std::size_t len = static_cast<std::size_t>( (stop-start)/sizeof(DataType) + 1 );
	StringType str( (const char*)start, len );
	return str;
}

template<class ImplTraits>
ANTLR_UINT32	InputStream<ImplTraits>::get_line()
{
	return m_line;
}

template<class ImplTraits>
const typename InputStream<ImplTraits>::DataType*	InputStream<ImplTraits>::getLineBuf()
{
	return m_currentLine;
}

template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32	InputStream<ImplTraits>::get_charPositionInLine()
{
	return m_charPositionInLine;
}

template<class ImplTraits>
ANTLR_INLINE void	InputStream<ImplTraits>::set_charPositionInLine(ANTLR_UINT32 position)
{
	m_charPositionInLine = position;
}

template<class ImplTraits>
void	InputStream<ImplTraits>::set_newLineChar(ANTLR_UINT32 newlineChar)
{
	m_newlineChar = newlineChar;
}

template<class ImplTraits>
ANTLR_INLINE LexState<ImplTraits>::LexState()
{
	m_nextChar = NULL;
	m_line = 0;
	m_currentLine = NULL;
	m_charPositionInLine = 0;
}

template<class ImplTraits>
ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const
{
	return m_nextChar;
}

template<class ImplTraits>
ANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const
{
	return m_line;
}

template<class ImplTraits>
ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const
{
	return m_currentLine;
}

template<class ImplTraits>
ANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const
{
	return m_charPositionInLine;
}

template<class ImplTraits>
ANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar )
{
	m_nextChar = nextChar;
}

template<class ImplTraits>
ANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line )
{
	m_line = line;
}

template<class ImplTraits>
ANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine )
{
	m_currentLine = currentLine;
}

template<class ImplTraits>
ANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
{
	m_charPositionInLine = charPositionInLine;
}

template<class ImplTraits>
ANTLR_INLINE typename InputStream<ImplTraits>::IntStreamType*	InputStream<ImplTraits>::get_istream()
{
	return this;
}

template<class ImplTraits>
void InputStream<ImplTraits>::setupInputStream()
{
	bool  isBigEndian;

    // Used to determine the endianness of the machine we are currently
    // running on.
    //
    ANTLR_UINT16 bomTest = 0xFEFF;
    
    // What endianess is the machine we are running on? If the incoming
    // encoding endianess is the same as this machine's natural byte order
    // then we can use more efficient API calls.
    //
    if  (*((ANTLR_UINT8*)(&bomTest)) == 0xFE)
    {
        isBigEndian = true;
    }
    else
    {
        isBigEndian = false;
    }

    // What encoding did the user tell us {s}he thought it was? I am going
    // to get sick of the questions on antlr-interest, I know I am.
    //
    switch  (m_encoding)
    {
        case    ENC_UTF8:

            // See if there is a BOM at the start of this UTF-8 sequence
            // and just eat it if there is. Windows .TXT files have this for instance
            // as it identifies UTF-8 even though it is of no consequence for byte order
            // as UTF-8 does not have a byte order.
            //
            if  (       (*(m_nextChar))      == 0xEF
                    &&  (*(m_nextChar+1))    == 0xBB
                    &&  (*(m_nextChar+2))    == 0xBF
                )
            {
                // The UTF8 BOM is present so skip it
                //
                m_nextChar += 3;
            }

            // Install the UTF8 input routines
            //
			this->setupIntStream( isBigEndian, isBigEndian );
			this->set_charByteSize(0);
            break;

        case    ENC_UTF16:

            // See if there is a BOM at the start of the input. If not then
            // we assume that the byte order is the natural order of this
            // machine (or it is really UCS2). If there is a BOM we determine if the encoding
            // is the same as the natural order of this machine.
            //
            if  (       (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFE
                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFF
                )
            {
                // BOM Present, indicates Big Endian
                //
                m_nextChar += 1;

				this->setupIntStream( isBigEndian, true );
            }
            else if  (      (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFF
                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFE
                )
            {
                // BOM present, indicates Little Endian
                //
                m_nextChar += 1;

                this->setupIntStream( isBigEndian, false );
            }
            else
            {
                // No BOM present, assume local computer byte order
                //
                this->setupIntStream(isBigEndian, isBigEndian);
            }
			this->set_charByteSize(2);
            break;

        case    ENC_UTF32:

            // See if there is a BOM at the start of the input. If not then
            // we assume that the byte order is the natural order of this
            // machine. If there is we determine if the encoding
            // is the same as the natural order of this machine.
            //
            if  (       (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0x00
                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2))    == 0xFE
                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3))    == 0xFF
                )
            {
                // BOM Present, indicates Big Endian
                //
                m_nextChar += 1;

                this->setupIntStream(isBigEndian, true);
            }
            else if  (      (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFF
                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFE
                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
                )
            {
                // BOM present, indicates Little Endian
                //
                m_nextChar += 1;

				this->setupIntStream( isBigEndian, false );
            }
            else
            {
                // No BOM present, assume local computer byte order
                //
				this->setupIntStream( isBigEndian, isBigEndian );
            }
			this->set_charByteSize(4);
            break;

        case    ENC_UTF16BE:

            // Encoding is definately Big Endian with no BOM
            //
			this->setupIntStream( isBigEndian, true );
			this->set_charByteSize(2);
            break;

        case    ENC_UTF16LE:

            // Encoding is definately Little Endian with no BOM
            //
            this->setupIntStream( isBigEndian, false );
			this->set_charByteSize(2);
            break;

        case    ENC_UTF32BE:

            // Encoding is definately Big Endian with no BOM
            //
			this->setupIntStream( isBigEndian, true );
			this->set_charByteSize(4);
            break;

        case    ENC_UTF32LE:

            // Encoding is definately Little Endian with no BOM
            //
			this->setupIntStream( isBigEndian, false );
			this->set_charByteSize(4);
            break;

        case    ENC_EBCDIC:

            // EBCDIC is basically the same as ASCII but with an on the
            // fly translation to ASCII
            //
            this->setupIntStream( isBigEndian, isBigEndian );
			this->set_charByteSize(1);
            break;

        case    ENC_8BIT:
        default:

            // Standard 8bit/ASCII
            //
            this->setupIntStream( isBigEndian, isBigEndian );
			this->set_charByteSize(1);
            break;
    }    
}

}