aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/libmagic/magic/Magdir/rtf
blob: 48a1f28af4676caa021f704e3b89fc37da21bdc9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#------------------------------------------------------------------------------
# $File: rtf,v 1.9 2020/12/12 20:01:47 christos Exp $
# rtf:	file(1) magic for Rich Text Format (RTF)
#
# Duncan P. Simpson, D.P.Simpson@dcs.warwick.ac.uk
# Update:	Joerg Jenderek
# URL:		https://en.wikipedia.org/wiki/Rich_Text_Format
# Reference:	http://www.snake.net/software/RTF/RTF-Spec-1.7.rtf
#		http://www.kleinlercher.at/tools/Windows_Protocols/Word2007RTFSpec9.pdf
0	string		{\\rtf
# skip DROID fmt-355-signature-id-522.rtf by looking for valid version
>5	ubyte		!0xAB
# skip also \ in DROID fmt-50-signature-id-158.rtf by looking for valid version
>>5	ubyte		!0x5C		Rich Text Format data
!:mime	text/rtf
!:apple	????RTF
!:ext	rtf
>>>0	use		rtf-info
#	display information like version, language and code page of RTF
0	name		rtf-info
# 1 mostly, 2 for newer Pocket Word documents, space for test like fdo78502.rtf, { for some urtf
>5	ubyte		!0x7b		\b, version %c
# The word for character set must precede any text or most other control words
>6	string		\\mac		\b, Apple Macintosh
>6	string		\\pc
# control word \pca
>>9	ubyte		=0x61		\b, IBM PS/2, code page 850
>>9	ubyte		!0x61		\b, IBM PC, code page 437
# unknown character set or ANSI later after control words like
# \adeflang1025 \info \title \author \category \manager
# "Burow, Steffanie - Im Tal des Schneeleoparden.rtf"
#>6	search/105	\\ansi		\b, ANSI
>6	search/502	\\ansi		\b, ANSI
>6	default		x		\b, unknown character set
# look for explicit codepage keyword
# "Burow, Steffanie - Im Tal des Schneeleoparden.rtf"
#>5	search/110	\\ansicpg
>5	search/500	\\ansicpg
# skip unknown or buggy codepage string 0 like in fdo78502.rtf
>>&0	ubyte		!0x30		\b, code page
# codepage string: 437~United States IBM, ..., 1252~WesternEuropean, ..., 57011~Punjabi
>>>&-1		string	x		%-.3s
# skip space or \ and display possible 4th digit of code page string
>>>&2		ubyte	>0x2F
>>>>&-1		ubyte	<0x3A		\b%c
# possible 5th digit of code page string
>>>>>&0		ubyte	>0x2F
>>>>>>&-1	ubyte	<0x3A		\b%c
# look again at version byte to use default clause
>5	ubyte		x
# Default language ID for South Asian/Middle Eastern text
# language ID: 1025, ..., 1065~Persian, ..., 2057~English_UnitedKingdom, ..., 58380~French_NorthAfrica
# Readme-0.72-Persian.rtf
#>6	search/1	\\adeflang	\b, default middle east language ID
>>6	search/497	\\adeflang	\b, default middle east language ID
# https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oe376/6c085406-a698-4e12-9d4d-c3b0ee3dbc4a
>>>&0	string		x		%.4s
# skip \ and NL and show possible 5th digit of language string
>>>&4	ubyte		>0x2F
>>>>&-1	ubyte		<0x3A		\b%c
# else look for default language to be used when the \plain control word is encountered
>>6	default		x
# "Burow, Steffanie - Im Tal des Schneeleoparden.rtf"
#>>>6	search/127	\\deflang
>>>6	search/505	\\deflang
>>>>&0	string		>0		\b, default language ID %-.4s
# possible 5th digit of language string
>>>>&4		ubyte	>0x2F
>>>>>&-1	ubyte	<0x3A		\b%c

# Reference:	http://latex2rtf.sourceforge.net/rtfspec_63.html
# Note:		no real world example found
0	string		{\\urtf		Rich Text Format unicoded data
!:mime	text/rtf
#!:apple	????RTF
!:ext	rtf
>1	use		rtf-info

# URL:		https://en.wikipedia.org/wiki/Microsoft_Word
# Reference:	http://fileformats.archiveteam.org/wiki/Microsoft_Word
# Note:	called by TrID "Pocket Word document"
#	by PlanMaker "Pocket Word-Handheld PC" for pwd
#	by PlanMaker "Pocket Word-Pocket PC" for psw
0	string		{\\pwd		Pocket Word document or template
# by SoftMaker Office	http://extension.nirsoft.net/pwd
#!:mime	application/msword
# https://reposcope.com/mimetype/application/x-pocket-word
!:mime	application/x-pocket-word
# PWD for Handheld PC variant and PSW for Pocket PC variant
# PWT for template
!:ext	pwd/psw/pwt
>0	use		rtf-info