freetype-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Git][freetype/freetype][gsoc-anurag-2023] 2 commits: Optimize SIMD, ad


From: Anurag Thakur (@AdbhutDev)
Subject: [Git][freetype/freetype][gsoc-anurag-2023] 2 commits: Optimize SIMD, add new line drawing function
Date: Fri, 06 Oct 2023 23:35:58 +0000

Anurag Thakur pushed to branch gsoc-anurag-2023 at FreeType / FreeType

Commits:

  • f5bc9da6
    by Anurag Thakur at 2023-10-07T01:20:03+05:30
    Optimize SIMD, add new line drawing function
    
  • 895d11b1
    by Anurag Thakur at 2023-10-07T05:05:36+05:30
    Attempt to use new render line algo
    

2 changed files:

Changes:

  • src/dense/ftdense.c
    ... ... @@ -80,25 +80,35 @@ dense_move_to( const FT_Vector* to, dense_worker* worker )
    80 80
     static int
    
    81 81
     dense_line_to( const FT_Vector* to, dense_worker* worker )
    
    82 82
     {
    
    83
    -  dense_render_line( worker, UPSCALE( to->x ), UPSCALE( to->y ) );
    
    83
    +  dense_render_line( worker, worker->prev_x, worker->prev_y, UPSCALE( to->x ), UPSCALE( to->y ) );
    
    84 84
       dense_move_to( to, worker );
    
    85 85
       return 0;
    
    86 86
     }
    
    87 87
     
    
    88 88
     void
    
    89
    -dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    
    89
    +dense_render_line( dense_worker* worker, FT_Pos fromx, FT_Pos fromy, FT_Pos tox, FT_Pos toy )
    
    90
    +{
    
    91
    +  return;
    
    92
    +}
    
    93
    +
    
    94
    +
    
    95
    +void
    
    96
    +dense_render_line2( dense_worker* worker, FT_PreLine pl )
    
    90 97
     {
    
    91 98
       //printf("Line from %d, %d to %d, %d\n", worker->prev_x, worker->prev_y, tox, toy);
    
    92 99
     
    
    93
    -  FT26D6 fx = worker->prev_x>>2;
    
    94
    -  FT26D6 fy = worker->prev_y>>2;
    
    100
    +  // FT26D6 fx = worker->prev_x>>2;
    
    101
    +  // FT26D6 fy = worker->prev_y>>2;
    
    102
    +
    
    103
    +  FT26D6 fx = UPSCALE(pl->x1)>>2;
    
    104
    +  FT26D6 fy = UPSCALE(pl->y1)>>2;
    
    95 105
     
    
    96 106
       FT26D6 from_x = fx;
    
    97 107
       FT26D6 from_y = fy;
    
    98 108
     
    
    99 109
     
    
    100
    -  FT26D6 tx = tox>>2;
    
    101
    -  FT26D6 ty = toy>>2;
    
    110
    +  FT26D6 tx = UPSCALE(pl->x2)>>2;
    
    111
    +  FT26D6 ty = UPSCALE(pl->y2)>>2;
    
    102 112
     
    
    103 113
       if ( fy == ty )
    
    104 114
         return;
    
    ... ... @@ -164,7 +174,7 @@ dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    164 174
     
    
    165 175
         }
    
    166 176
       }
    
    167
    -  else
    
    177
    +  else if(0)
    
    168 178
       {
    
    169 179
         int    x       = from_x;
    
    170 180
         int    y0      = from_y>>6;
    
    ... ... @@ -202,8 +212,9 @@ dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    202 212
           if ( x1i <= x0i + 1 )
    
    203 213
           {
    
    204 214
             FT26D6 xmf = ( ( x + xnext )>>1) - x0floor;
    
    205
    -        m_a[linestart + x0i] += d * ((1<<6) - xmf);
    
    206
    -        m_a[linestart + ( x0i + 1 )] += d * xmf;
    
    215
    +        FT20D12 dxmf = d*xmf;
    
    216
    +        m_a[linestart + x0i] += (d * 64) - dxmf;
    
    217
    +        m_a[linestart + ( x0i + 1 )] += dxmf;
    
    207 218
           }
    
    208 219
           else
    
    209 220
           {
    
    ... ... @@ -242,6 +253,85 @@ dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    242 253
           x = xnext;
    
    243 254
         }
    
    244 255
       }
    
    256
    +  else{
    
    257
    +        FT20D12* m_a     = worker->m_a;
    
    258
    +
    
    259
    +    float    x0       = from_x/64.0;
    
    260
    +    float    y0       = from_y/64.0;
    
    261
    +    float    x1       = to_x/64.0;
    
    262
    +    float    y1       = to_y/64.0;
    
    263
    +
    
    264
    +    float    start_x       = truncf(x0);
    
    265
    +    float    start_y       = truncf(y0);
    
    266
    +    float    end_x       = truncf(x1);
    
    267
    +    float    end_y       = truncf(y1);
    
    268
    +
    
    269
    +    float dx = end_x - start_x;
    
    270
    +    float dy = end_y - start_y;
    
    271
    +
    
    272
    +    float tdx = dx == 0 ? 999999 : 1.0/dx;
    
    273
    +    float tdy = 1.0/dy;
    
    274
    +
    
    275
    +    int target_x = start_x + 1.0;
    
    276
    +    int target_y = start_y + 1.0;
    
    277
    +
    
    278
    +    float sx = 1.0 * ((tdx>0)?1:-1);
    
    279
    +    float sy = 1.0 * ((tdy>0)?1:-1);
    
    280
    +
    
    281
    +    float tmx = tdx*((target_x - x0));
    
    282
    +    float tmy = tdy*((target_y - y0));
    
    283
    +
    
    284
    +    tdx = fabs(tdx);
    
    285
    +    tdy = fabs(tdy);
    
    286
    +
    
    287
    +    float x_prev = x0;
    
    288
    +    float y_prev = y0;
    
    289
    +
    
    290
    +    int index = start_x + start_y*worker->m_w;
    
    291
    +    int index_x_inc = (int)sx;
    
    292
    +    int index_y_inc = worker->m_w * ((sy>0)?1:-1);
    
    293
    +
    
    294
    +    int dist = (abs(start_x - end_x) + abs(start_y-end_y));
    
    295
    +
    
    296
    +    while (dist > 0)
    
    297
    +    {
    
    298
    +      dist--;
    
    299
    +      int prev_index = index;
    
    300
    +      float y_next, x_next;
    
    301
    +      if (tmx<tmy)
    
    302
    +      {
    
    303
    +        y_next = tmx * dy + y0;
    
    304
    +        x_next = target_x;
    
    305
    +        tmx += tdx;
    
    306
    +        target_x += sx;
    
    307
    +        index += index_x_inc;
    
    308
    +      }else{
    
    309
    +        y_next = target_y;
    
    310
    +        x_next = tmy*dx + x0;
    
    311
    +        tmy += tdy;
    
    312
    +        target_y += sy;
    
    313
    +        index += index_y_inc;
    
    314
    +
    
    315
    +      }
    
    316
    +      if(prev_index>0 && prev_index<(worker->m_h * worker->m_w)){
    
    317
    +      m_a[prev_index] += (y_prev-y_next) - ((y_prev-y_next)*((x_prev+x_next)/2));
    
    318
    +      m_a[prev_index +1] += ((y_prev-y_next) *((x_prev+x_next)/2));
    
    319
    +      }
    
    320
    +
    
    321
    +      x_prev = x_next;
    
    322
    +      y_prev = y_next;
    
    323
    +
    
    324
    +    }
    
    325
    +    m_a[(int)(end_x+end_y* worker->m_w)] += (y_prev-y1) - ((y_prev-y1)*((x_prev+x1)/2));
    
    326
    +    m_a[(int)(end_x+end_y * worker->m_w)+1] += (y_prev-y1)*((x_prev+x1)/2);
    
    327
    +
    
    328
    +
    
    329
    +  }
    
    330
    +
    
    331
    +
    
    332
    +
    
    333
    +  
    
    334
    + 
    
    245 335
     }
    
    246 336
     
    
    247 337
     
    
    ... ... @@ -342,7 +432,7 @@ dense_render_cubic( dense_worker* worker,
    342 432
     
    
    343 433
       if ( devsq < 0.333f )
    
    344 434
       {
    
    345
    -    dense_render_line( worker, aP3.x, aP3.y );
    
    435
    +    dense_render_line( worker, worker->prev_x, worker->prev_y, aP3.x, aP3.y );
    
    346 436
         return;
    
    347 437
       }
    
    348 438
     
    
    ... ... @@ -357,7 +447,7 @@ dense_render_cubic( dense_worker* worker,
    357 447
         FT_Vector a    = Lerp( t, Lerp( t, aP0, aP1 ), Lerp( t, aP1, aP2 ) );
    
    358 448
         FT_Vector b    = Lerp( t, Lerp( t, aP1, aP2 ), Lerp( t, aP2, aP3 ) );
    
    359 449
         FT_Vector next = Lerp( t, a, b );
    
    360
    -    dense_render_line( worker, next.x, next.y );
    
    450
    +    dense_render_line( worker, worker->prev_x, worker->prev_y, next.x, next.y );
    
    361 451
         worker->prev_x = next.x;
    
    362 452
         worker->prev_y = next.y;
    
    363 453
         p              = next;
    
    ... ... @@ -423,22 +513,17 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target, FT_PreLine pl
    423 513
     {
    
    424 514
      // FT_Error error = FT_Outline_Decompose( &( worker->outline ),
    
    425 515
      //                                        &dense_decompose_funcs, worker );
    
    426
    -  FT_Vector point1 = {pl->x1, pl->y1};
    
    427
    -  FT_Vector point2 = {100, 100};
    
    516
    +  // FT_Vector point1 = {pl->x1, pl->y1};
    
    428 517
     
    
    429
    -  FT_Error error = dense_move_to(&point1, worker);
    
    518
    +  FT_Error error = 0;
    
    430 519
       while (pl!=NULL)
    
    431 520
       {
    
    432
    -    point1.x = pl->x1;
    
    433
    -    point1.y = pl->y1;
    
    434
    -    point2.x = pl->x2;
    
    435
    -    point2.y = pl->y2;
    
    521
    +    dense_render_line2(worker, pl);
    
    436 522
     
    
    437
    -    if(pl->ismove){
    
    438
    -      dense_move_to(&point2, worker);
    
    439
    -    }else{
    
    440
    -    dense_line_to(&point2, worker);
    
    441
    -    }
    
    523
    +
    
    524
    +    // worker->prev_x = UPSCALE(pl->x2);
    
    525
    +    // worker->prev_y = UPSCALE(pl->y2);
    
    526
    +    //dense_line_to(&point2, worker);
    
    442 527
         pl= pl->next;
    
    443 528
       }
    
    444 529
       // point.x = 100;
    
    ... ... @@ -493,7 +578,8 @@ __m128i offset = _mm_setzero_si128();
    493 578
     
    
    494 579
         // cap max value to 1
    
    495 580
         //y = _mm_min_epi32( _mm_srli_epi32( y, 4 ), _mm_set1_epi32( 255 ) );
    
    496
    -    __m128i y = _mm_abs_epi32(_mm_srai_epi32(  x , 4 ));
    
    581
    +    //__m128i y = _mm_abs_epi32(_mm_srai_epi32(  x , 4 ));
    
    582
    +    __m128i y = _mm_srli_epi32( _mm_abs_epi32( x) , 4 );
    
    497 583
     
    
    498 584
         // reduce to 255
    
    499 585
         // y = 
    
    ... ... @@ -505,7 +591,8 @@ __m128i offset = _mm_setzero_si128();
    505 591
         //__m128i z = _mm_packus_epi16(_mm_packs_epi32(z, nzero), nzero);
    
    506 592
     
    
    507 593
         // int* ptr = (int*)&dest[i];
    
    508
    -    *(int*)&dest[i] =  *(int*)&y;
    
    594
    +    _mm_storeu_si32(&dest[i], y);
    
    595
    +    //*(int*)&dest[i] =  *(int*)&y;
    
    509 596
         //*(int*)&dest[i] =  _mm_extract_epi32(y, 0);
    
    510 597
     
    
    511 598
         //_mm_store_ss( (float*)&dest[i], _mm_castsi128_ps(y) );
    
    ... ... @@ -527,7 +614,7 @@ __m128i offset = _mm_setzero_si128();
    527 614
         value += *source++;
    
    528 615
     
    
    529 616
         if(value > 0){
    
    530
    -      int n = value >>4;
    
    617
    +      int n = value >>4;_Pos fromx, FT_Pos fromy, FT_Pos tox, FT_Pos toy
    
    531 618
     
    
    532 619
           if(n>255)n=255;
    
    533 620
           *dest = (unsigned char)n;
    
    ... ... @@ -581,10 +668,10 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
    581 668
     
    
    582 669
       int size = (worker->m_w * worker->m_h + 3) & ~3;
    
    583 670
     
    
    584
    -  worker->m_a      = malloc( sizeof( FT20D12 ) * size );
    
    671
    +  worker->m_a      = calloc( size, sizeof( FT20D12 ));
    
    585 672
       worker->m_a_size = size;
    
    586 673
     
    
    587
    -  memset( worker->m_a, 0, ( sizeof( FT20D12 ) * size ) );
    
    674
    +  //memset( worker->m_a, 0, ( sizeof( FT20D12 ) * size ) );
    
    588 675
       /* exit if nothing to do */
    
    589 676
       if ( worker->m_w <= worker->m_origin_x || worker->m_h <= worker->m_origin_y )
    
    590 677
       {
    

  • src/dense/ftdense.h
    ... ... @@ -43,7 +43,7 @@ extern "C"
    43 43
         FT_Outline outline;
    
    44 44
       } dense_worker;
    
    45 45
     
    
    46
    -  void dense_render_line( dense_worker* worker, FT_Pos to_x, FT_Pos to_y );
    
    46
    +  void dense_render_line( dense_worker* worker, FT_Pos from_x, FT_Pos from_y, FT_Pos to_x, FT_Pos to_y );
    
    47 47
       void dense_render_quadratic( dense_worker* worker,
    
    48 48
                                    FT_Vector* control,
    
    49 49
                                    FT_Vector* to );
    


  • reply via email to

    [Prev in Thread] Current Thread [Next in Thread]