1.申请ID:WLChara
2.个人邮箱:1848563850@qq.com
3.原创技术文章:
对某易语言程序的分析
最近对易语言程序有些感兴趣,于是找了一个程序来逆向。
废话不多说,先把exe拖进IDA,然后ctrl+e找到oep,可以发现这样一个函数:
void __noreturn start()
{
DWORD Version; // eax
int wShowWindow; // eax
HMODULE ModuleHandleA; // eax
int wShowWindow_1; // [esp-4h] [ebp-78h]
CHAR *lpCmdLine; // [esp+10h] [ebp-64h]
int Code; // [esp+14h] [ebp-60h]
struct _STARTUPINFOA StartupInfo; // [esp+18h] [ebp-5Ch] BYREF
CPPEH_RECORD ms_exc; // [esp+5Ch] [ebp-18h]
Version = GetVersion();
dword_A641C0 = BYTE1(Version);
::Version = (unsigned __int8)Version;
dword_A641B8 = BYTE1(Version) + ((unsigned __int8)Version << 8);
dword_A641B4 = HIWORD(Version);
if ( !sub_598A57(1) )
fast_error_exit(0x1Cu);
if ( !_mtinit() )
fast_error_exit(0x10u);
ms_exc.registration.TryLevel = 0;
_ioinit();
dword_A69344 = (int)GetCommandLineA();
Str = (char *)__crtGetEnvironmentStringsA();
_setargv();
_setenvp();
_cinit();
StartupInfo.dwFlags = 0;
GetStartupInfoA(&StartupInfo);
lpCmdLine = (CHAR *)_wincmdln();
if ( (StartupInfo.dwFlags & STARTF_USESHOWWINDOW) != 0 )
wShowWindow = StartupInfo.wShowWindow;
else
wShowWindow = 10;
wShowWindow_1 = wShowWindow;
ModuleHandleA = GetModuleHandleA(0);
Code = WinMain(ModuleHandleA, 0, lpCmdLine, wShowWindow_1);
exit(Code);
}
不难发现,sub_598A57是一个CRT初始化。
为什么一个易语言的程序的entrypoint会有如此c的代码呢?很简单,因为易语言真正的代码运行在一个类似“vm”的东西里,entrypoint需要进行一系列行为才能执行开发者编写的易语言代码。
当时,我认为,接下来我需要分析的函数是这三者之一:_cinit WinMain sub_598A57。这三个函数中的每一个都可能藏了一些小作坊加料。于是我先对_cinit进行了分析
void _cinit()
{
if ( _fpmath_0 )
_fpmath_0();
_initterm(&First, &Last);
_initterm(&First_, &Last_);
}
‘
很明显,_cinit并不是我们要找的。我们再去看CRT初始化:
int __cdecl sub_598A57(int a1)
{
int n2; // eax
int v2; // eax
hHeap_0 = HeapCreate(a1 == 0, 0x1000u, 0);
if ( !hHeap_0 )
return 0;
n2 = sub_59890F();
n2 = n2;
if ( n2 == 3 )
{
v2 = __sbh_heap_init(1016);
}
else
{
if ( n2 != 2 )
return 1;
v2 = sub_59CF3C();
}
if ( !v2 )
{
HeapDestroy(hHeap_0);
return 0;
}
return 1;
}
那么我们基本可以确定代码藏在winmain里了。点进winmain,可以发现:
int __stdcall WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd)
{
return sub_5AB33A(hInstance, hPrevInstance, lpCmdLine, nShowCmd);
}
int __stdcall sub_5AB33A(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd)
{
int v4; // ebx
struct CWinThread *Thread; // esi
int v6; // edi
int v7; // ecx
int v8; // eax
v4 = -1;
Thread = AfxGetThread();
v6 = *((_DWORD *)AfxGetModuleState() + 1);
if ( sub_5AFC7D(hInstance, hPrevInstance, lpCmdLine, nShowCmd)
&& (!v6 || (*(int (__thiscall **)(int))(*(_DWORD *)v6 + 132))(v6)) )
{
if ( (*(int (__thiscall **)(struct CWinThread *))(*(_DWORD *)Thread + 80))(Thread) )
{
v8 = (*(int (__thiscall **)(struct CWinThread *))(*(_DWORD *)Thread + 84))(Thread);
}
else
{
v7 = *((_DWORD *)Thread + 7);
if ( v7 )
(*(void (__thiscall **)(int))(*(_DWORD *)v7 + 88))(v7);
v8 = (*(int (__thiscall **)(struct CWinThread *))(*(_DWORD *)Thread + 104))(Thread);
}
v4 = v8;
}
AfxWinTerm();
return v4;
}
WinMain直接跳到了sub_5AB33A,这个函数根本就是没符号的AfxWinMain!这下看懂了,AfxWinMain的大概流程是这样的:
1.获取线程对象和模块状态
Thread = AfxGetThread();
v6 = *((_DWORD *)AfxGetModuleState() + 1);
2.做AfxWinInit/WinInit初始化,然后初始化App
if ( sub_5AFC7D(hInstance, hPrevInstance, lpCmdLine, nShowCmd)
&& (!v6 || v6->vfunc132()) )
{
//CatCodes...
}
3.如果线程初始化成功则运行程序( CWinThread::Run() ),否则就ExitInstance
if ( Thread->vfunc80() ) // CWinThread::InitInstance() ?
{
v8 = Thread->vfunc84(); // CWinApp::Run()
}
else
{
v7 = *((_DWORD *)Thread + 7); // m_pMainWnd
if ( v7 )
v7->vfunc88(); // CWnd::PostNcDestroy / DestroyWindow
v8 = Thread->vfunc104(); // CWinThread::ExitInstance()
}
v4 = v8;
4.结束框架(这个就不贴代码了)
那么接下来,我们去找一下虚表,然后再通过虚表找到InitInstance方法,看看他具体做了什么。
在这一步我用了一个比较偷懒的方法:直接在ida里面搜CWinApp::Run,之后点击函数名,选中按x,发现CWinApp::Run有两个引用,其中第一个是CWinApp的虚表,第二个则是一块没RTTI信息的内存区域,像虚表。
内存区:
.rdata:009E8840 dd offset sub_4F8720
.rdata:009E8844 dd offset ?Run@CWinApp@@UAEHXZ ; CWinApp::Run(void)
.rdata:009E8848 dd offset sub_4FC820
.rdata:009E884C dd offset sub_5AA610
.rdata:009E8850 dd offset sub_4FAF10
.rdata:009E8854 dd offset sub_5AA1A3
.rdata:009E8858 dd offset sub_4F8FB0
.rdata:009E885C dd offset sub_5A9A56
.rdata:009E8860 dd offset sub_5AA4AB
.rdata:009E8864 dd offset ?GetMainWnd@CWinThread@@UAEPAVCWnd@@XZ ; CWinThread::GetMainWnd(void)
.rdata:009E8868 dd offset ?Delete@CWinThread@@UAEXXZ ; CWinThread::Delete(void)
.rdata:009E886C dd offset ?GetNextDocTemplate@CWinApp@@QBEPAVCDocTemplate@@AAPAU__POSITION@@@Z ; CWinApp::GetNextDocTemplate(__POSITION * &)
.rdata:009E8870 dd offset sub_5AD161
.rdata:009E8874 dd offset ?InitApplication@CWinApp@@UAEHXZ ; CWinApp::InitApplication(void)
.rdata:009E8878 dd offset sub_5AD14E
.rdata:009E887C dd offset sub_5ACECF
.rdata:009E8880 dd offset ?DoWaitCursor@CWinApp@@UAEXH@Z ; CWinApp::DoWaitCursor(int)
于是我们可以知道vfunc80的真面目了:sub_4F8720。用ida看一下他的伪代码
int __thiscall sub_4F8720(HMODULE *this)
{
int v1; // eax
sub_5057C0(this[26], (int)(this + 54));
v1 = sub_5106E0(sub_401000, sub_401000, nullsub_1);
//猜猜sub_401000是什么? xor eax,eax!每个易语言的401000处都会是这个东西,不过我并不清楚他具体用来干什么。
return sub_4E4D7D(v1);
}
.text:004F8720
.text:004F8720 push ebp
.text:004F8721 mov ebp, esp
.text:004F8723 push ecx
.text:004F8724 push ebx
.text:004F8725 push esi
.text:004F8726 mov esi, ecx
.text:004F8728 push edi
.text:004F8729 mov ecx, [esi+68h]
.text:004F872C lea eax, [esi+0D8h]
.text:004F8732 push eax ; int
.text:004F8733 push ecx ; hModule
.text:004F8734 call sub_5057C0
.text:004F8739 add esp, 8
.text:004F873C lea ecx, [esi+384h]
.text:004F8742 push offset nullsub_1
.text:004F8747 push offset sub_401000
.text:004F874C push offset sub_401000
.text:004F8751 call sub_5106E0
.text:004F8756 pusha
.text:004F8757 call sub_4E4D7D
.text:004F875C mov [ebp+var_4], eax
.text:004F875F popa
.text:004F8760 mov eax, [ebp+var_4]
.text:004F8763 pop edi
.text:004F8764 pop esi
.text:004F8765 pop ebx
.text:004F8766 mov esp, ebp
.text:004F8768 pop ebp
.text:004F8769 retn
.text:004F8769 sub_4F8720 endp
我们可以看到程序在call sub_4E4D7D之前执行了pusha,压入了所有寄存器到堆栈,这很可疑,像虚拟机初始化,直接看看他干了什么:
.text:004E4D7D ; int sub_4E4D7D()
.text:004E4D7D sub_4E4D7D proc near ; CODE XREF: sub_4F8720+37↓p
.text:004E4D7D mov eax, 6
.text:004E4D82 call sub_4E4E04
.text:004E4D87 cld
.text:004E4D88 fninit
.text:004E4D8A call sub_4B2DE3
.text:004E4D8F push offset sub_4B4647
.text:004E4D94 mov eax, 3
.text:004E4D99 call sub_4E4E04
.text:004E4D9E add esp, 4
.text:004E4DA1 call sub_401199
.text:004E4DA6 call loc_40111A
.text:004E4DAB call sub_4010EF
.text:004E4DB0 call sub_401044
.text:004E4DB5 call sub_401067
.text:004E4DBA call loc_4010B7
.text:004E4DBF call loc_401004
.text:004E4DC4 call loc_401093
.text:004E4DC9 call sub_4010DB
.text:004E4DCE call sub_401185
.text:004E4DD3 call loc_4011E1
.text:004E4DD8 call loc_4011BC
.text:004E4DDD call loc_40102F
.text:004E4DE2 call sub_4ADDEE
.text:004E4DE7 call loc_40113E
.text:004E4DEC call sub_401162
.text:004E4DF1 call loc_401362
.text:004E4DF6 call sub_4E4DFE
.text:004E4DFB xor eax, eax
.text:004E4DFD retn
.text:004E4DFD sub_4E4D7D endp
很明显,这就是易语言程序的入口点了。在进入之前pusha保存寄存器,进入后cld+finit,初始化了df和fpu。这不像是c生成的代码,大概率是手写的汇编入口。
其中,我注意到了这个函数: sub_4E4E04。我们可以看到,函数先让eax=3,然后调用了 sub_4E4E04,后让eax=6,又一次调用了 sub_4E4E04。手写汇编应该不会存在毫无意义的脑蚕赋值,所以我猜测 sub_4E4E04 被用来处理opcode。步入后果然如此。他最终调用了“ jmp funcs_503AF0[eax*4]”,funcs_503AF0则是一个存了很多函数指针的表。
.data:00A06750 funcs_503AF0 dd offset sub_4FBE70 ; DATA XREF: sub_503AF0↑r
.data:00A06754 dd offset sub_4FBE60
.data:00A06758 dd offset sub_4FBF80
.data:00A0675C dd offset sub_503F80
.data:00A06760 dd offset sub_4FBFE0
.data:00A06764 dd offset sub_4FC000
.data:00A06768 dd offset sub_503AE0
我们直接去分析case 6(sub_503AE0),可以看到:
int sub_503AE0()
{
return sub_522210(&unk_A2F478);
}
而sub_522210则是
int __thiscall sub_522210(_DWORD *this)
{
int v2; // eax
unsigned __int8 *v3; // eax
unsigned __int8 *v4; // edi
HCURSOR CursorA; // esi
HGDIOBJ StockObject; // eax
int v7; // eax
_DWORD *v8; // edi
_DWORD *v9; // edi
int i; // eax
int j; // eax
int k; // esi
int v13; // eax
void (__cdecl *v14)(int, int (__stdcall *)(int, SIZE_T, LPVOID), _DWORD); // eax
int v15; // eax
int v16; // ebx
int v17; // esi
int v18; // esi
int v19; // esi
_DWORD *v20; // esi
int v21; // esi
int v22; // eax
int v23; // esi
int v24; // eax
int v25; // ecx
_DWORD *v26; // eax
bool v27; // zf
int v29; // [esp+0h] [ebp-1C4h] BYREF
CHAR Filename[260]; // [esp+Ch] [ebp-1B8h] BYREF
_BYTE v31[68]; // [esp+110h] [ebp-B4h] BYREF
_BYTE v32[40]; // [esp+154h] [ebp-70h] BYREF
char v33[4]; // [esp+17Ch] [ebp-48h] BYREF
_DWORD v34[2]; // [esp+184h] [ebp-40h] BYREF
_DWORD v35[2]; // [esp+18Ch] [ebp-38h] BYREF
int v36; // [esp+194h] [ebp-30h]
unsigned int v37; // [esp+19Ch] [ebp-28h]
int v38; // [esp+1A0h] [ebp-24h]
void *v39; // [esp+1A4h] [ebp-20h]
unsigned int v40; // [esp+1A8h] [ebp-1Ch]
int v41; // [esp+1ACh] [ebp-18h]
_DWORD *v42; // [esp+1B0h] [ebp-14h]
int *v43; // [esp+1B4h] [ebp-10h]
int n8; // [esp+1C0h] [ebp-4h]
v43 = &v29;
v42 = this;
this[259] = GetProcessHeap();
this[49] = 3;
v2 = this[257];
this[50] = 0;
this[51] = 124871;
if ( !v2 )
{
OleInitialize(0);
this[257] = 1;
}
GetModuleFileNameA(0, Filename, 0x104u);
v3 = _mbsrchr((const unsigned __int8 *)Filename, 0x5Cu);
v4 = v3;
if ( v3 )
{
*v3 = 0;
CString::operator=((CString *)(this + 52), Filename);
CString::operator=((CString *)(this + 53), (LPCSTR)v4 + 1);
}
else
{
CString::Empty((CString *)(this + 52));
CString::operator=((CString *)(this + 53), Filename);
}
SetCurrentDirectoryA((LPCSTR)this[52]);
sub_5106E0(this + 220, (int)sub_57B980, (int (*)(void))sub_57B990, nullsub_8);
sub_5106E0(this + 225, (int)sub_401000, sub_401000, nullsub_1);
sub_5106E0(this + 238, (int)sub_401000, sub_401000, nullsub_1);
CursorA = LoadCursorA(0, (LPCSTR)0x7F00);
StockObject = GetStockObject(NULL_BRUSH);
v7 = sub_505000(
aElHideowner, // "_EL_HideOwner"
0,
(int)CursorA,
(int)StockObject,
0);
sub_5A728F((struct CWnd *)(this + 242), 128, v7, (int)byte_A2339C, 0, 0, 0, 0, 0, 0, 0, 0);
this[138] = GetCurrentThreadId();
this[102] = "\\RA2\\Res\\MP.dat";
this[103] = &aRa2ResMpDat[2081825];
v8 = v42;
sub_4EB650(0, 0x71Cu);
sub_4EB650(0, 0x71Cu);
v9 = v8 + 82;
sub_4EB650(0, 0x71Cu);
sub_4EB650(0, 0x71Cu);
for ( i = 0; i < 455; ++i )
{
if ( i >= 0 && i < v9[4] >> 2 )
*(_DWORD *)(v9[2] + 4 * i) = *(&off_5B6804 + i);
}
for ( j = 0; j < 455; ++j )
{
if ( j >= 0 && j < v42[91] >> 2 )
*(_DWORD *)(v42[89] + 4 * j) = off_5B6F20[j];// "ShellExecuteA"
}
for ( k = 0; k < 10; ++k )
{
v13 = (int)*(&off_5BA784 + k);
if ( v13 )
{
v14 = *(void (__cdecl **)(int, int (__stdcall *)(int, SIZE_T, LPVOID), _DWORD))(v13 + 120);
if ( v14 )
v14(1, sub_4FC8F0, 0);
}
}
sub_4EB650(&off_5BA784, 0x28u);
sub_4EB650(0, 0x28u);
sub_4EAEE0(v35);
v34[0] = &off_9E8948;
v34[1] = v42;
n8 = 0;
AFX_EXCEPTION_LINK::AFX_EXCEPTION_LINK((AFX_EXCEPTION_LINK *)v33);
LOBYTE(n8) = 2;
CMemFile::CMemFile((CMemFile *)v32, 0x400u);
LOBYTE(n8) = 3;
CMemFile::Attach((CMemFile *)v32, byte_7BBB20, 0x22AF62u, 0);
sub_5AC22F(v32, 1, 4096, 0);
LOBYTE(n8) = 4;
sub_4F84C0((CArchive *)v31);
CArchive::Close((CArchive *)v31);
LOBYTE(n8) = 3;
CArchive::~CArchive((CArchive *)v31);
LOBYTE(n8) = 2;
CMemFile::~CMemFile((CMemFile *)v32);
n8 = 0;
AfxTryCleanup();
v39 = &loc_401004;
v15 = (v37 != 0 ? v36 : 0) + 4 * (v37 >> 3);
if ( v37 >> 3 )
{
v38 = (v37 != 0 ? v36 : 0) + 4 * (v37 >> 3);
v40 = v37 >> 3;
do
{
v16 = 0;
v41 = *(_DWORD *)(*(_DWORD *)v15 + 24);
if ( v41 > 0 )
{
do
{
v17 = sub_4F72D0(v16, 0);
if ( sub_4F7B00(v16) == 1 )
{
v18 = sub_5648C0(v17 + 24) + v17 + 24;
v19 = v18 + sub_5648C0(v18) + 12;
v20 = (_DWORD *)(sub_5648C0(v19) + v19);
if ( *v20 == -1 )
*v20 = 0;
else
*v20 += v39;
}
else
{
v21 = sub_5648C0(v17 + 24) + v17 + 24;
v22 = sub_5648C0(v21);
v23 = *(_DWORD *)(v21 + v22 + 28 + 4 * *(_DWORD *)(v21 + v22 + 28) + 4)
+ v21
+ v22
+ 28
+ 4 * *(_DWORD *)(v21 + v22 + 28)
+ 4
+ 4;
v24 = sub_5648C0(v23);
v25 = *(_DWORD *)(v24 + v23 + 12);
v26 = (_DWORD *)(v24 + v23 + 20);
if ( v25 > 0 )
{
do
{
if ( *v26 == -1 )
*v26 = 0;
else
*v26 += v39;
v26 += 2;
--v25;
}
while ( v25 );
}
}
++v16;
}
while ( v16 < v41 );
}
v15 = v38 + 4;
v27 = v40 == 1;
v38 += 4;
--v40;
}
while ( !v27 );
}
sub_4F7A80(v34);
sub_4EB0E0(v35);
v34[0] = &off_9E8948;
n8 = 8;
sub_4F8400(v34);
v34[0] = &off_9E8954;
n8 = 9;
sub_4EB0E0(v35);
n8 = -1;
v35[0] = &off_9E6AC8;
return sub_4EB0E0(v35);
}
就是这里,接下来只需要对这个函数进行分析,就能找到易语言程序作者编写的代码了。分析的过程我就不继续贴了。总之,我发现函数sub_4F72D0就是那个易语言官方Runtime提供的遍历接口
_DWORD *__thiscall sub_4F72D0(_DWORD *this, int a2, _DWORD *a3)
{
int v3; // edx
int v4; // eax
_DWORD *v5; // eax
v3 = this[1];
if ( a2 >= v3 )
return 0;
if ( this[6] )
v4 = this[4];
else
v4 = 0;
v5 = (_DWORD *)(4 * v3 + v4 + 4 * v3 + *(_DWORD *)(4 * v3 + v4 + 4 * a2));
if ( a3 )
*a3 = *v5;
return v5 + 1;
}
.text:004F72D0 ; _DWORD *__thiscall sub_4F72D0(_DWORD *this, int, _DWORD *)
.text:004F72D0 sub_4F72D0 proc near ; CODE XREF: sub_4EA480+5A↑p
.text:004F72D0 ; sub_4EA480+130↑p ...
.text:004F72D0
.text:004F72D0 arg_0 = dword ptr 4
.text:004F72D0 arg_4 = dword ptr 8
.text:004F72D0
.text:004F72D0 mov edx, [ecx+4] ; esi = count
.text:004F72D3 push esi ; esi = index
.text:004F72D4 mov esi, [esp+4+arg_0]
.text:004F72D8 cmp esi, edx
.text:004F72DA jge short loc_4F730E
.text:004F72DC mov eax, [ecx+18h] ; eax = base pointer
.text:004F72DF test eax, eax
.text:004F72E1 jnz short loc_4F72E7
.text:004F72E3 xor eax, eax ; base = 0
.text:004F72E5 jmp short loc_4F72EA
.text:004F72E7 ; ---------------------------------------------------------------------------
.text:004F72E7
.text:004F72E7 loc_4F72E7: ; CODE XREF: sub_4F72D0+11↑j ; loc_has_a_base
.text:004F72E7 mov eax, [ecx+10h] ; eax = base address
.text:004F72EA
.text:004F72EA loc_4F72EA: ; CODE XREF: sub_4F72D0+15↑j
.text:004F72EA lea ecx, ds:0[edx*4] ; ecx = offset table ;lea ecx, [edx*4 + eax]
.text:004F72F1 lea edx, [ecx+eax] ; edx = record pool base
.text:004F72F4 mov eax, [edx+esi*4] ; eax = offset
.text:004F72F7 add eax, ecx
.text:004F72F9 mov ecx, [esp+4+arg_4]
.text:004F72FD add eax, edx ; eax = record start
.text:004F72FF test ecx, ecx
.text:004F7301 jz short loc_4F7307
.text:004F7303 mov edx, [eax]
.text:004F7305 mov [ecx], edx
.text:004F7307
.text:004F7307 loc_4F7307: ; CODE XREF: sub_4F72D0+31↑j
.text:004F7307 add eax, 4
.text:004F730A pop esi
.text:004F730B retn 8
.text:004F730E ; ---------------------------------------------------------------------------
.text:004F730E
.text:004F730E loc_4F730E: ; CODE XREF: sub_4F72D0+A↑j
.text:004F730E xor eax, eax
.text:004F7310 pop esi
.text:004F7311 retn 8
.text:004F7311 sub_4F72D0 endp
.text:004F7311
通过这段代码,应该就能够dump出易语言程序里的函数了。
|