﻿//like strcpy, but returns a pointer to the '\0' character at the new end of s1
#ifndef _WIN64
#define strpcpy8 strpcpy8_asm
__declspec(naked) char8_t* strpcpy8(char8_t* s1, const char8_t* s2){
   __asm{
	push	edi
	mov	edi, [esp+8]
	mov	ecx, [esp+0ch]

	test	ecx,3		;test if s2 is aligned
	je		short loop_entry
align_s2:		;byte loop
      mov	dl,byte ptr [ecx]
      add	ecx,1
      test	dl,dl
      je		short byte_0
      mov	[edi],dl
      add	edi,1
      test	ecx,3
      jne		short align_s2
      jmp	short loop_entry

main_loop:	;copy from edx into s1 (edi)
      mov	[edi],edx
      add	edi,4
	add	ecx,4
loop_entry:
      mov	edx,7efefeffh
      mov	eax,dword ptr [ecx]		;test the one we are about to copy
      add	edx,eax
      xor	eax,-1
      xor	eax,edx
      mov	edx,[ecx]		;edx=*ecx (move *s2 to edx)
      test	eax,81010100h
      je		short main_loop
	;possibly found the '\0'
      test	dl,dl
      je		short byte_0
      test	dh,dh
      je		short byte_1
      test	edx,00ff0000h
      je		short byte_2
      test	edx,0ff000000h
      je		short byte_3
	jmp	short main_loop		;if ecx[3] was 80 (80 is Ç)

byte_3:
      mov	[edi],edx
      add	edi,3
	jmp short salida
byte_2:
      mov	[edi],dx
      add	edi,2
      mov	byte ptr [edi],0
      jmp short salida
byte_1:
      mov	[edi],dx
      add	edi,1
      jmp short salida
byte_0:
      mov	[edi],dl

salida:
	mov	eax, edi
	pop	edi
	ret
   }
}

/*To be programmed in assembly code, avoiding moving s1 back and forth to the stack and the registers
#define FUNCTION_strbuild
char8_t* strbuild(char8_t* s1, const char8_t* t0, ...){
}*/
#else //Provide a dummy definition
char8_t* strpcpy8_asm(char8_t* s1, const char8_t* s2){return NULL;}
#endif
