[Eng] CVE-2021-39863 Analysis
CVE-2021-39863
An Out Of Bounds Read / Write and Heap Buffer OverFlow vulnerability occurred during the process of connecting two URLs with different encodings in Adobe
- Affected Products
- We used 32-bit Adobe Acrobat Reader DC 21.005.20048.43252 on Exploit.
- You can download the Adobe with this version at this link
Analysis
1. Find the Vulnerable Function
- This vulnerability occurs during URL concatenating, so we debugged IA32.api in Adobe Acrobat Reader DC.
-
The referenced Exploit blogs don’t have the method of finding a weak function, so we found a weak function in three ways.
- 3 ways to find vulnerable function
- Cross-Reference Check
-
Checked by Exodus Blog, the function called from the vulnerable function calls the
strncat
function to concatenate the URLs. - IA32.api uses the
strncat
function by importing from other modules.- The names of the functions imported from other modules are stored in the
.rdata
segment and used when this function calls.
- The names of the functions imported from other modules are stored in the
- Like [Fig 1] and [Fig 2], we found the vulnerable function by following the cross-reference from the string
"strncat"
stored in the.rdata
segment.[Fig 1] Find the string
"strncat"
and find the cross-reference function of it.
[Fig 2] Check the cross-reference function of the function which was found in [Fig 1]
-
- Search as OpCode
-
- The vulnerable function checks UTF-16BOM to find UTF-16BE encoding.
- For example, it performs a comparative operation 1-byte value and
\xFF
- We found OpCode which compares 1-byte register and
\xFF
and found the vulnerable function using found OpCode.
[Fig 3] Find the vulnerable function by Searching OpCode
-
- Use IDC IDA Script
- We found out from the Exodus blog that the vulnerable function has many instructions.
- Based on this, we thought we could find the vulnerable function easily by searching the number of instructions.
- We found the vulnerable functions by programming scripts to find functions with more than a specific number of instructions with IDC, the built-in programming language of IDA.
// IDC script to find functions more than a specific number of instructions #include <idc.idc> static main() { auto func, end, count, inst; func = 0; Message("================ START =================\n"); for(func = NextFunction(func); func != BADADDR; func = NextFunction(func)) { if(func != -1) { end = GetFunctionAttr(func, FUNCATTR_END); count = 0; inst = func; while(inst < end) { count++; inst = FindCode(inst, SEARCH_DOWN | SEARCH_NEXT); } if(count > 400) { Message("%s contains %d instructions\n" , Name(func), count); } } else { //Message("No function found at location %x", func); } } }
[Fig 4] Find the vulnerable function with IDC IDA Script
- Cross-Reference Check
2. Analysis of the Vulnerable Function
Decompiled Code for the Vulnerable Function in IA32.api
__int16 __cdecl ExploitPoint(wchar_t *Source, CHAR *lpString, char *String, _DWORD *a4, int *a5)
{
__int16 v5; // di
wchar_t *concatURL_addr; // ebx
CHAR *lpString_copy; // eax
CHAR v8; // dl
__int64 len_lpString; // rax
wchar_t *Source_copy; // ecx
__int64 len_Source; // rax
int v12; // eax
int totallen_Source; // eax
int len_Source_notUTF; // eax
CHAR *allocadr_Source; // eax
wchar_t *v16; // ecx
int totallen_lpString; // eax
int len_lpString_notUTF; // eax
CHAR *allocadr_lpString; // eax
int v20; // eax
int v21; // edx
int v22; // edx
_DWORD *v23; // eax
int v24; // ecx
int *v25; // eax
int v26; // ecx
int v27; // eax
int v28; // ecx
int v29; // eax
wchar_t *v30; // ecx
int v31; // eax
int len_allocaddr; // eax
int v33; // eax
int v34; // ecx
int v35; // edx
wchar_t *v37; // [esp-4h] [ebp-F4h]
unsigned int v38; // [esp-4h] [ebp-F4h]
wchar_t *v39; // [esp-4h] [ebp-F4h]
unsigned int v40; // [esp-4h] [ebp-F4h]
unsigned int v41; // [esp-4h] [ebp-F4h]
int v42[7]; // [esp+Ch] [ebp-E4h] BYREF
int v43; // [esp+28h] [ebp-C8h]
int v44; // [esp+2Ch] [ebp-C4h]
int v45; // [esp+30h] [ebp-C0h]
int v46; // [esp+34h] [ebp-BCh]
wchar_t *v47; // [esp+38h] [ebp-B8h]
__int64 v48; // [esp+3Ch] [ebp-B4h]
int v49; // [esp+4Ch] [ebp-A4h]
int v50[3]; // [esp+50h] [ebp-A0h] BYREF
int v51; // [esp+5Ch] [ebp-94h]
int v52; // [esp+60h] [ebp-90h]
int v53; // [esp+64h] [ebp-8Ch]
int v54; // [esp+68h] [ebp-88h]
int v55; // [esp+6Ch] [ebp-84h]
int v56; // [esp+70h] [ebp-80h]
int v57; // [esp+74h] [ebp-7Ch]
int v58; // [esp+78h] [ebp-78h]
char *v59; // [esp+7Ch] [ebp-74h]
__int64 v60; // [esp+80h] [ebp-70h]
__int64 v61; // [esp+88h] [ebp-68h]
int v62; // [esp+90h] [ebp-60h]
int v63[3]; // [esp+94h] [ebp-5Ch] BYREF
int v64; // [esp+A0h] [ebp-50h]
int v65; // [esp+A4h] [ebp-4Ch]
int v66; // [esp+A8h] [ebp-48h]
int v67; // [esp+ACh] [ebp-44h]
int v68; // [esp+B0h] [ebp-40h]
int v69; // [esp+B4h] [ebp-3Ch]
int v70; // [esp+B8h] [ebp-38h]
int v71; // [esp+BCh] [ebp-34h]
void *v72; // [esp+C0h] [ebp-30h]
__int128 v73; // [esp+C4h] [ebp-2Ch]
int v74; // [esp+D4h] [ebp-1Ch]
int iMaxLength[2]; // [esp+D8h] [ebp-18h]
LPCSTR allocadr_lpString_copy; // [esp+E0h] [ebp-10h]
LPCSTR allocadr_Source_copy; // [esp+E4h] [ebp-Ch]
int v78[2]; // [esp+E8h] [ebp-8h] BYREF
allocadr_Source_copy = 0;
allocadr_lpString_copy = 0;
v5 = 1;
*(_QWORD *)v78 = 0i64;
*(_QWORD *)iMaxLength = 0i64;
concatURL_addr = 0;
v49 = 0;
v62 = 0;
v74 = 0;
if(!a5) return 0;
*a5 = 0;
// [1-1] get the length of relative URL
lpString_copy = lpString;
if(lpString && *lpString && (v8 = lpString[1]) != 0 && *lpString == (CHAR)0xFE && v8 == (CHAR)0xFF) {
len_lpString = ((__int64 (__cdecl *)(CHAR *))strlen_UTF16BE)(lpString);
v78[1] = len_lpString;
if ((HIDWORD(len_lpString)&(unsigned int)len_lpString) == -1) {
LABEL_9:
*a5 = -2;
return 0;
}
lpString_copy = lpString;
} else {
v78[1] = v78[0];
}
// [1-2] get the length of base URL
Source_copy = Source;
if(!Source || !lpString_copy || !String || !a4) {
*a5 = -2;
goto LABEL_86;
}
if(*(_BYTE *)Source != 0xFE) goto LABEL_25;
if(*((_BYTE *)Source+1) == 0xFF) {
len_Source = ((__int64 (__cdecl *)(wchar_t *))strlen_UTF16BE)(Source);
iMaxLength[1] = len_Source;
if((HIDWORD(len_Source)&(unsigned int)len_Source) == -1) goto LABEL_9;
Source_copy = Source;
v12 = iMaxLength[1];
} else {
v12 = iMaxLength[0];
}
if(*(_BYTE *)Source_copy == 0xFE && *((_BYTE *)Source_copy+1) == 0xFF) {
totallen_Source = v12 + 2;
} else {
LABEL_25:
len_Source_notUTF = (int)custom_strlen((LPCSTR)Source_copy);
Source_copy = v37;
totallen_Source = len_Source_notUTF + 1;
}
iMaxLength[1] = totallen_Source;
// [2-1] store base URL to new heap
allocadr_Source = (CHAR *)((int (__usercall *)@<eax>(wchar_t *@<e>, int, int))calloc_guess)(Source_copy, 1, totallen_Source);
allocadr_Source_copy = allocadr_Source;
if(!allocadr_Source) {
*a5 = -7;
return 0;
}
((void (__usercall *)(unsigned int@<ecx>, wchar_t *, wchar_t *, int))custom_strncpy)(v38, (wchar_t *)allocadr_Source, Source, iMaxLength[1]);
if(*lpString==(CHAR)0xFE && lpString[1]==(CHAR)0xFF) {
totallen_lpString = v78[1] + 2;
} else {
len_lpString_notUTF = (int)custom_strlen(lpString);
v16 = v39;
totallen_lpString = len_lpString_notUTF + 1;
}
v78[1] = totallen_lpString;
// [2-2] store relative URL to new heap
allocadr_lpString = (CHAR *)((int (__usercall *)@<eax>(wchar_t *@<ecx>, int, int))calloc_guess)(v16, 1, totallen_lpString);
allocadr_lpString_copy = allocadr_lpString;
if(!allocadr_lpString) {
*a5 = -7;
LABEL_86:
v5 = 0;
goto LABEL_87;
}
((void (__usercall *)(unsigned int@<ecx>, wchar_t *, wchar_t *, int))custom_strncpy)(v40, (wchar_t *)allocadr_lpString, (wchar_t *)lpString, v78[1]);
if(!(unsigned __int16)check_modify_URL((int)allocadr_Source_copy, iMaxLength[1], a5) || !(unsigned __int16)check_modify_URL((int)allocadr_lpString_copy, v78[1], a5)) {
goto LABEL_86;
}
// [3] Perform URL-related operations
v20 = URLparse_process((CHAR *)allocadr_Source_copy, v42);
if(v20 || (v20 = URLparse_process((CHAR *)allocadr_lpString_copy, v50)) != 0) {
*a5 = v20;
goto LABEL_86;
}
if(!*(_BYTE *)Source || (v21 = v42[0], v50[0] != 5) && v50[0] != v42[0]) {
v35 = sub_25802FAC((int)v50);
v23 = a4;
v24 = v35 + 1;
if(v35 + 1 > *a4) goto LABEL_44;
*a4 = v35;
v25 = v50;
goto LABEL_82;
}
if(*lpString) {
v26 = v55;
v63[1] = v42[1];
v63[2] = v42[2];
v27 = v51;
v63[0] = v42[0];
v73 = 0i64;
if(!v51 && !v53 && !v55) {
if(sub_25803155(v50)) {
v28 = v44;
v64 = v42[3];
v65 = v42[4];
v66 = v42[5];
v67 = v42[6];
v29 = v43;
if(v49 == 1) {
v29 = v43 + 2;
v28 = v44 - 1;
v43 += 2;
--v44;
}
v69 = v28;
v68 = v29;
v70 = v45;
if(v58) {
if (*v59 != '/') {
// [4] Allocate new heap to store connected URL
concatURL_addr = (wchar_t *)((int (__usercall *)@<eax>(wchar_t *@<ecx>, int, int))calloc_guess)((wchar_t *)(v58 + 1), 1, v58 + 1 + v46);
if(!concatURL_addr) {
v23 = a4;
v24 = v58 + v46 + 1;
goto LABEL_44;
}
if(v46) {
// [5] Store base URL to heap memory which is allocated at [4]
((void (__usercall *)(unsigned int@<ecx>, wchar_t *, wchar_t *, int))custom_strncpy)(v41, concatURL_addr, v47, v46 + 1);
if (*((_BYTE *)concatURL_addr + v46 - 1) != '/') {
v31 = ((int (__usercall *)@<eax>(wchar_t *@<ecx>, char *, int))sub_25818D6E)(v30, (char *)concatURL_addr, '/');
if(v31) *(_BYTE *)(v31 + 1) = 0;
else *(_BYTE *)concatURL_addr = 0;
}
}
// [6] Concatenate relative URL after the base URL stored at [5] : OOB R/W and Heap BOF occurs
if(v58) {
len_allocaddr = (int)custom_strlen((LPCSTR)concatURL_addr);
((void (__usercall *)(uintptr_t@<ecx>, char *, char *, int))custom_strncat)(v58 + 1, (char *)concatURL_addr, v59, v58 + 1 + len_allocaddr);
}
sub_25802E0C((int)concatURL_addr, 0);
v71 = (int)custom_strlen((LPCSTR)concatURL_addr);
v72 = concatURL_addr;
goto LABEL_75;
}
v71 = v58;
v72 = v59;
} else {
v71 = v46;
v72 = v47;
if((_DWORD)v60) goto LABEL_75;
*(_QWORD *)&v73 = v48;
}
LABEL_74:
if((_DWORD)v73) {
LABEL_77:
if ( (int)v61 > 0 ) *((_QWORD *)&v73 + 1) = v61;
v34 = sub_25802FAC((int)v63);
if(v34+1 > *a4) {
*a4 = v34 + 1;
goto LABEL_45;
}
*a4 = v34;
v25 = v63;
goto LABEL_82;
}
LABEL_75:
if((int)v60 > 0) *(_QWORD *)&v73 = v60;
goto LABEL_77;
}
v26 = v55;
v21 = v42[0];
v27 = v51;
}
v64 = v27;
v65 = v52;
v66 = v53;
v67 = v54;
v33 = v56;
if(v62 == 1) {
v26 += 2;
v33 = v56 - 1;
v55 = v26;
--v56;
}
v69 = v33;
v68 = v26;
v71 = v58;
v70 = v57;
v72 = v59;
if(v57) goto LABEL_75;
v78[1] = 0;
if(!sub_25802C93(v21, &v78[1])) goto LABEL_75;
v70 = v78[1];
goto LABEL_74;
}
v22 = sub_25802FAC((int)v42);
v23 = a4;
v24 = v22 + 1;
if(v22+1 > *a4) {
LABEL_44:
*v23 = v24;
LABEL_45:
*a5 = -3;
goto LABEL_86;
}
*a4 = v22;
v25 = v42;
LABEL_82:
sub_25803194((int)v25, String);
LABEL_87:
if(allocadr_Source_copy)
(*(void (__cdecl **)(LPCSTR))(dword_25824098 + 12))(allocadr_Source_copy);
if(allocadr_lpString_copy)
(*(void (__cdecl **)(LPCSTR))(dword_25824098 + 12))(allocadr_lpString_copy);
if(concatURL_addr)
(*(void (__cdecl **)(wchar_t *))(dword_25824098 + 12))(concatURL_addr);
return v5;
}
You should understand follow functions to understand the Root Cause.
ExploitPoint()
: Start function to analysis- Parameter
_BYTE *Source
: baseURL_BYTE *lpString
: relative URL
- The necessary steps for the exploit can be summarized as follows (You can find these steps in the previous code in toggle,
Decompiled Code for the Vulnerable Function in IA32.api
):- Get the length of the URLs
[1-1] Get the length of the relative URL
[1-2] Get the length of the base URL
- Store the URLs in the Heap memory
[2-1] Store the base URL in the new Heap memory
[2-2] Store the relative URL in the new Heap memory
Perform URL-related operations(This is no direct association with Exploit.)- Allocate new Heap memory to store the connected URL
- Store the base URL to Heap memory which is allocated at [4]
- Concatenate the relative URL after the base URL which is stored at [5] : OOB R/W and Heap BOF occurs
- Get the length of the URLs
[1-1] Get the length of the relative URL
[1-2] Get the length of the base URL
- Parameter
strlen_UTF16BE
: Calculate the length of the string encoded as UTF-16BE- Parameter
char *string
: UTF-16BE encodedstring
to calculate the length
- return value : The number of bytes of
string
except the Null Terminator
int __cdecl strlen_UTF16BE(char *string) { char *p_string_i0; // eax char string_i1; // cl int length; // esi char string_i0; // bl char *p_string_i1; // eax p_string_i0 = string; if(!string || *string != -2 || string[1] != -1) return -1; string_i1 = 0; length = 0; do { string_i0 = *p_string_i0; p_string_i1 = p_string_i0 + 1; if(!p_string_i1) break; string_i1 = *p_string_i1; p_string_i0 = p_string_i1 + 1; if(!string_i0) goto LABEL_10; if(!string_i1) break; length += 2; } while ( p_string_i0 ); if(string_i0) return -1; LABEL_10: if(!string_i1) return length; else return -1; }
- Parameter
custom_strlen()
:strlen
function also works for UTF-16BE encoded string- Parameter
LPCSTR lpString
: UTF-16BE encodedlpString
to calculate the length
- return value : The number of bytes except the Null Terminator
int __cdecl strlen_UTF16BE_(char *string) { char *v1; // ecx int i; // edx char v4; // al v1 = string; if(!string) return 0; for(i = 0; ; i += 2) { v4 = *v1; v1 += 2; if (!v4 && !*(v1-1)) break; } return i; }
- Parameter
calloc_guess()
:alloc
function in Ccustom_strncpy()
:strncpy
function also works for UTF-16BE encoded string- Parameter
wchar_t *Destination
: The address to store the copied stringwchar_t *Source
: The string to copyunsigned int iMaxLength
: The length to copy a string
- return value :
Destination
wchar_t *__cdecl custom_strncpy(wchar_t *Destination, wchar_t *Source, unsigned int iMaxLength) { wchar_t *result; // eax int pExceptionObject; // [esp+Ch] [ebp-4h] BYREF if(!Destination || !Source || !iMaxLength) { (*(void (__thiscall **)(_DWORD, int))(dword_258240A4 + 4))(*(_DWORD *)(dword_258240A4 + 4), 1073741827); pExceptionObject = 0; CxxThrowException(&pExceptionObject, (_ThrowInfo *)&_TI1H); } if(*(_BYTE *)Source == 0xFE && *((_BYTE *)Source+1) == 0xFF) return wcsncpy(Destination, Source, iMaxLength >> 1); result = (wchar_t *)lstrcpynA((LPSTR)Destination, (LPCSTR)Source, iMaxLength); *((_BYTE *)Destination + iMaxLength - 1) = 0; return result; }
- Parameter
custom_strncat()
:strncat
function also works for UTF-16BE encoded string- Parameter
char *Destination
,char *Source
,int maxlength
- Concatenate the
Source
string after theDestination
string - Call
custom_strcat(Destination, Source);
via secondif
statement in normal case
int __cdecl custom_strncat(char *Destination, char *Source, int maxlength) { int result; // eax LPCSTR pExceptionObject; // [esp+10h] [ebp-4h] BYREF if(!Destination || !Source || !maxlength) { (*(void (__thiscall **)(_DWORD, int))(dword_258240A4 + 4))(*(_DWORD *)(dword_258240A4 + 4), 1073741827); pExceptionObject = 0; CxxThrowException(&pExceptionObject, (_ThrowInfo *)&_TI1H); } pExceptionObject = custom_strlen(Destination); if(&custom_strlen(Source)[(int)pExceptionObject] <= (const CHAR *)(maxlength-1)) { custom_strcat(Destination, Source); return 1; } else { strncat(Destination, Source, maxlength - (_DWORD)pExceptionObject - 1); result = 0; Destination[maxlength - 1] = 0; } return result; }
- Parameter
custom_strcat()
:strcat
function also works for UTF-16BE encoded string. The vulnerability occurs directly.- Parameter
LPSTR lpString1
: base URLLPCSTR lpString2
: relative URL
-
If the base URL is encoded as UTF-16BE, then this function copies 2 bytes at a time from the relative URL to after the base URL until
"\x00\00"
comes.- This means the program recognizes that
lpString1
andlpString2
were encoded as UTF-16BE, and proceeds with URL concatenating.
- This means the program recognizes that
-
If the base URL is not encoded as UTF-16BE, then this function copies 1 byte at a time from the relative URL to after the base URL until
"\00"
comes.- This means the program recognizes that
lpString1
andlpString2
were not encoded as UTF-16BE and proceeds with URL concatenating.
- This means the program recognizes that
LPSTR __cdecl custom_strcat(LPSTR lpString1, LPCSTR lpString2) { int len_lpString1; // eax LPCSTR p_lpString2_i2; // edx CHAR *concatpoint; // ecx CHAR lpString2_i2; // al CHAR lpString2_i3; // bl int pExceptionObject; // [esp+10h] [ebp-4h] BYREF if(!lpString1 || !lpString2) { (*(void (__thiscall **)(_DWORD, int))(dword_258240A4 + 4))(*(_DWORD *)(dword_258240A4 + 4), 1073741827); pExceptionObject = 0; CxxThrowException(&pExceptionObject, (_ThrowInfo *)&_TI1H); } if(*lpString1 == (CHAR)0xFE && lpString1[1] == (CHAR)0xFF) { len_lpString1 = (int)custom_strlen(lpString1); p_lpString2_i2 = lpString2 + 2; concatpoint = &lpString1[len_lpString1]; do { do { lpString2_i2 = *p_lpString2_i2; p_lpString2_i2 += 2; *concatpoint = lpString2_i2; concatpoint += 2; lpString2_i3 = *(p_lpString2_i2 - 1); *(concatpoint - 1) = lpString2_i3; } while(lpString2_i2); } while(lpString2_i3); } else { lstrcatA(lpString1, lpString2); } return lpString1; }
- Parameter
3. Root Cause
When UTF-16BE encoded base URL and ANSI encoded relative URL are concatenated, the function in IA32.api recognize both URL are encoded UTF-16BE.
Therefore, the process of concatenating proceeds until
"\x00\x00"
, which is not the Null Terminator of relative URL,"\x00"
.
This leads to the OOB R/W and Heap BOF.
[Fig 5] The schematic diagram of the Root Cause
You can see the flow of the Root cause in this Adobe root cause.pdf file.
REFERENCE
-
THE IDA PRO BOOK 2ND EDITION (Chris Eagle)