freetype-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Git][freetype/freetype][gsoc-anurag-2023-final] 16 commits: [dense] Mig


From: Anurag Thakur (@AdbhutDev)
Subject: [Git][freetype/freetype][gsoc-anurag-2023-final] 16 commits: [dense] Migrate line drawing and accumulation to fixed-point
Date: Mon, 09 Oct 2023 21:36:41 +0000

Anurag Thakur pushed to branch gsoc-anurag-2023-final at FreeType / FreeType

Commits:

  • 3e56c9bf
    by Anurag Thakur at 2023-10-10T01:46:37+05:30
    [dense] Migrate line drawing and accumulation to fixed-point
    
    * src/dense/ftdense.h: (FT26D6, FT20D12): New typedefs
    
    * src/dense/ftdense.c: dense_render_line, dense_render_glyph now
    use fixed-point numbers for calculation
    
    Disabled SIMD for now
    
  • bca7bda1
    by Anurag Thakur at 2023-10-10T01:46:43+05:30
    [dense] Re-enable SIMD to work with fixed-point
    
    * src/dense/ftdense.c: Use integer SIMD functions for accumulation
    
    * src/dense/ftdense.h: Change types of FT26D6, FT20D12 to better fit
    their usage
    
  • 668bc29f
    by Anurag Thakur at 2023-10-10T01:46:43+05:30
    [dense] Add optimization for vertical lines
    
    * src/dense/ftdense.c: Optimize line drawing when a vertical line is encountered
    
  • 724e81ef
    by Anurag Thakur at 2023-10-10T01:46:43+05:30
    [dense] Add optimization for division
    
    * src/dense/ftdense.c: FT_UDIV, FT_UDIVPREP macros taken from smooth
    rasterizer, help optimize fixed-point division
    
  • d4864581
    by Anurag Thakur at 2023-10-10T01:46:43+05:30
    [dense] Add -msse4.1 to compile with CMake
    
  • f7015177
    by Anurag Thakur at 2023-10-10T01:46:43+05:30
    [dense] Add compilation flags for meson
    
  • a4dcdee6
    by Anurag Thakur at 2023-10-10T02:49:59+05:30
    [dense] Add ARM NEON support and improve SSE perf
    
  • 849f2b21
    by Anurag Thakur at 2023-10-10T02:50:03+05:30
    [dense] Add FT_New_Face2
    
  • ed911fa8
    by Anurag Thakur at 2023-10-10T02:50:03+05:30
    [dense] Add FT_PreLine struct
    
  • b79951a2
    by Anurag Thakur at 2023-10-10T02:50:03+05:30
    [dense] Modified FT_FaceRec, FT_GlyphSlotRec and FT_Raster_Params
    
  • e183ffb9
    by Anurag Thakur at 2023-10-10T02:50:03+05:30
    [dense] Add FT_Refresh_Glyph
    
  • 29a32c81
    by Anurag Thakur at 2023-10-10T02:50:03+05:30
    [dense] Add #defines to ftobjs.c
    
  • 8d89a20a
    by Anurag Thakur at 2023-10-10T02:50:03+05:30
    [dense] Implement FT_New_Face2 and fix glyph loading
    
  • bff4c234
    by Anurag Thakur at 2023-10-10T02:50:03+05:30
    [dense] Add code for curve flattening at load time
    
  • 70313bde
    by Anurag Thakur at 2023-10-10T02:50:03+05:30
    [dense] Add support for preloading in ft_open_face_internal
    
  • 47ae1bfa
    by Anurag Thakur at 2023-10-10T03:05:01+05:30
    [dense] Add support for rendering prelines
    

10 changed files:

Changes:

  • CMakeLists.txt
    ... ... @@ -247,6 +247,8 @@ if (BUILD_FRAMEWORK)
    247 247
     endif ()
    
    248 248
     
    
    249 249
     
    
    250
    +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1")
    
    251
    +
    
    250 252
     # Find dependencies
    
    251 253
     include(FindPkgConfig)
    
    252 254
     
    

  • builds/meson/parse_modules_cfg.py
    ... ... @@ -87,6 +87,7 @@ def generate_ftmodule(lists):
    87 87
             names = {
    
    88 88
                 "raster": ("ft_raster1",),
    
    89 89
                 "smooth": ("ft_smooth",),
    
    90
    +            "dense": ("ft_dense",),
    
    90 91
                 "svg": ("ft_svg",),
    
    91 92
                 "sdf": ("ft_sdf", "ft_bitmap_sdf"),
    
    92 93
             }.get(module)
    

  • include/freetype/freetype.h
    ... ... @@ -1276,6 +1276,7 @@ FT_BEGIN_HEADER
    1276 1276
         FT_ListRec        sizes_list;
    
    1277 1277
     
    
    1278 1278
         FT_Generic        autohint;   /* face-specific auto-hinter data */
    
    1279
    +    FT_GlyphSlot*      glyph_array;
    
    1279 1280
         void*             extensions; /* unused                         */
    
    1280 1281
     
    
    1281 1282
         FT_Face_Internal  internal;
    
    ... ... @@ -1283,6 +1284,44 @@ FT_BEGIN_HEADER
    1283 1284
       } FT_FaceRec;
    
    1284 1285
     
    
    1285 1286
     
    
    1287
    +
    
    1288
    +  /**************************************************************************
    
    1289
    +   *
    
    1290
    +   * @type:
    
    1291
    +   *   FT_PreLine
    
    1292
    +   *
    
    1293
    +   * @description:
    
    1294
    +   *   A handle to FT_PreLineRec_ containing coordinates of start and end
    
    1295
    +   *   points for a line.
    
    1296
    +   *
    
    1297
    +   */
    
    1298
    +  typedef struct FT_PreLineRec_* FT_PreLine;
    
    1299
    +
    
    1300
    +  /**************************************************************************
    
    1301
    +   *
    
    1302
    +   * @struct:
    
    1303
    +   *   FT_PreLineRec
    
    1304
    +   *
    
    1305
    +   * @description:
    
    1306
    +   *   Linkedlist containing lines to be drawn for a glyph.
    
    1307
    +   *
    
    1308
    +   * @fields:
    
    1309
    +   *   x1, y1 ::
    
    1310
    +   *     Coordinates of line start point.
    
    1311
    +   *
    
    1312
    +   *   y1, y2 ::
    
    1313
    +   *     Coordinates of line end point.
    
    1314
    +   *
    
    1315
    +   *   next ::
    
    1316
    +   *     The next PreLine for current glyph
    
    1317
    +   *
    
    1318
    +   */
    
    1319
    +  typedef struct FT_PreLineRec_
    
    1320
    +  {
    
    1321
    +    int x1, x2, y1, y2;
    
    1322
    +    FT_PreLine next;
    
    1323
    +  } FT_PreLineRec;
    
    1324
    +
    
    1286 1325
       /**************************************************************************
    
    1287 1326
        *
    
    1288 1327
        * @enum:
    
    ... ... @@ -2171,6 +2210,12 @@ FT_BEGIN_HEADER
    2171 2210
        *   other ::
    
    2172 2211
        *     Reserved.
    
    2173 2212
        *
    
    2213
    +   *   prelines ::
    
    2214
    +   *     Linkedlist containing lines to be drawn for the glyph
    
    2215
    +   *
    
    2216
    +   *   prel_shifted ::
    
    2217
    +   *     If the points in preline have been adjustted according to target bitmap
    
    2218
    +   *
    
    2174 2219
        *   lsb_delta ::
    
    2175 2220
        *     The difference between hinted and unhinted left side bearing while
    
    2176 2221
        *     auto-hinting is active.  Zero otherwise.
    
    ... ... @@ -2288,6 +2333,8 @@ FT_BEGIN_HEADER
    2288 2333
         FT_Pos            rsb_delta;
    
    2289 2334
     
    
    2290 2335
         void*             other;
    
    2336
    +    FT_PreLine        prelines;
    
    2337
    +    int               prel_shifted;
    
    2291 2338
     
    
    2292 2339
         FT_Slot_Internal  internal;
    
    2293 2340
     
    
    ... ... @@ -2487,6 +2534,10 @@ FT_BEGIN_HEADER
    2487 2534
        *   params ::
    
    2488 2535
        *     Extra parameters passed to the font driver when opening a new face.
    
    2489 2536
        *
    
    2537
    +   *   size ::
    
    2538
    +   *     Size at which the glyphs will be rendered. Use same value as
    
    2539
    +   *     @FT_Set_Pixel_Sizes
    
    2540
    +   *
    
    2490 2541
        * @note:
    
    2491 2542
        *   The stream type is determined by the contents of `flags`:
    
    2492 2543
        *
    
    ... ... @@ -2524,6 +2575,7 @@ FT_BEGIN_HEADER
    2524 2575
         FT_Module       driver;
    
    2525 2576
         FT_Int          num_params;
    
    2526 2577
         FT_Parameter*   params;
    
    2578
    +    FT_UInt         size;
    
    2527 2579
     
    
    2528 2580
       } FT_Open_Args;
    
    2529 2581
     
    
    ... ... @@ -2573,6 +2625,54 @@ FT_BEGIN_HEADER
    2573 2625
                    FT_Face     *aface );
    
    2574 2626
     
    
    2575 2627
     
    
    2628
    +/**************************************************************************
    
    2629
    +   *
    
    2630
    +   * @function:
    
    2631
    +   *   FT_New_Face2
    
    2632
    +   *
    
    2633
    +   * @description:
    
    2634
    +   *   Call @FT_Open_Face to open a font by its pathname with given flags.
    
    2635
    +   *
    
    2636
    +   * @inout:
    
    2637
    +   *   library ::
    
    2638
    +   *     A handle to the library resource.
    
    2639
    +   *
    
    2640
    +   * @input:
    
    2641
    +   *   pathname ::
    
    2642
    +   *     A path to the font file.
    
    2643
    +   *
    
    2644
    +   *   face_index ::
    
    2645
    +   *     See @FT_Open_Face for a detailed description of this parameter.
    
    2646
    +   *
    
    2647
    +   *   size ::
    
    2648
    +   *     Size at which glyphs will be rendered, Use the same value as @FT_Set_Pixel_Sizes
    
    2649
    +   *
    
    2650
    +   * @output:
    
    2651
    +   *   aface ::
    
    2652
    +   *     A handle to a new face object.  If `face_index` is greater than or
    
    2653
    +   *     equal to zero, it must be non-`NULL`.
    
    2654
    +   *
    
    2655
    +   * @return:
    
    2656
    +   *   FreeType error code.  0~means success.
    
    2657
    +   *
    
    2658
    +   * @note:
    
    2659
    +   *   The `pathname` string should be recognizable as such by a standard
    
    2660
    +   *   `fopen` call on your system; in particular, this means that `pathname`
    
    2661
    +   *   must not contain null bytes.  If that is not sufficient to address all
    
    2662
    +   *   file name possibilities (for example, to handle wide character file
    
    2663
    +   *   names on Windows in UTF-16 encoding) you might use @FT_Open_Face to
    
    2664
    +   *   pass a memory array or a stream object instead.
    
    2665
    +   *
    
    2666
    +   *   Use @FT_Done_Face to destroy the created @FT_Face object (along with
    
    2667
    +   *   its slot and sizes).
    
    2668
    +   */
    
    2669
    +  FT_EXPORT( FT_Error )
    
    2670
    +  FT_New_Face2( FT_Library   library,
    
    2671
    +               const char*  filepathname,
    
    2672
    +               FT_Long      face_index,
    
    2673
    +               FT_Face     *aface,
    
    2674
    +               FT_UInt      size);
    
    2675
    +
    
    2576 2676
       /**************************************************************************
    
    2577 2677
        *
    
    2578 2678
        * @function:
    
    ... ... @@ -3228,6 +3328,31 @@ FT_BEGIN_HEADER
    3228 3328
                      FT_UInt   glyph_index,
    
    3229 3329
                      FT_Int32  load_flags );
    
    3230 3330
     
    
    3331
    +  /**************************************************************************
    
    3332
    +   *
    
    3333
    +   * @function:
    
    3334
    +   *   FT_Refresh_Glyph
    
    3335
    +   *
    
    3336
    +   * @description:
    
    3337
    +   *   Prepare the glyph at glyph_index for rendering. Resets the glyph
    
    3338
    +   *   if it has already been rendered
    
    3339
    +   *
    
    3340
    +   * @inout:
    
    3341
    +   *   face ::
    
    3342
    +   *     A handle to the target face object where the glyph is loaded.
    
    3343
    +   *
    
    3344
    +   * @input:
    
    3345
    +   *   glyph_index ::
    
    3346
    +   *     The index of the glyph in the font file.
    
    3347
    +   *
    
    3348
    +   * @return:
    
    3349
    +   *   FreeType error code.  0~means success.
    
    3350
    +   *
    
    3351
    +   */
    
    3352
    +  FT_EXPORT( FT_Error )
    
    3353
    +  FT_Refresh_Glyph( FT_Face   face,
    
    3354
    +                    FT_UInt   glyph_index);
    
    3355
    +
    
    3231 3356
     
    
    3232 3357
       /**************************************************************************
    
    3233 3358
        *
    

  • include/freetype/ftimage.h
    ... ... @@ -1030,6 +1030,9 @@ FT_BEGIN_HEADER
    1030 1030
        *     An optional span clipping box expressed in _integer_ pixels
    
    1031 1031
        *     (not in 26.6 fixed-point units).
    
    1032 1032
        *
    
    1033
    +   *   prelines ::
    
    1034
    +   *     Pointer of type FT_PreLine, containing line data for a glyph
    
    1035
    +   *
    
    1033 1036
        * @note:
    
    1034 1037
        *   The @FT_RASTER_FLAG_AA bit flag must be set in the `flags` to
    
    1035 1038
        *   generate an anti-aliased glyph bitmap, otherwise a monochrome bitmap
    
    ... ... @@ -1059,6 +1062,7 @@ FT_BEGIN_HEADER
    1059 1062
         FT_Raster_BitSet_Func   bit_set;      /* unused */
    
    1060 1063
         void*                   user;
    
    1061 1064
         FT_BBox                 clip_box;
    
    1065
    +    void*                   prelines;
    
    1062 1066
     
    
    1063 1067
       } FT_Raster_Params;
    
    1064 1068
     
    

  • meson.build
    ... ... @@ -395,6 +395,13 @@ if use_unix_ftsystem_c
    395 395
     endif
    
    396 396
     
    
    397 397
     
    
    398
    +if cc.get_id() == 'msvc'
    
    399
    +  ft2_defines += ['/arch:AVX']
    
    400
    +else
    
    401
    +  ft2_defines += ['-msse4.1']
    
    402
    +endif
    
    403
    +
    
    404
    +
    
    398 405
     ft2_lib = library('freetype',
    
    399 406
       sources: ft2_sources + [ftmodule_h],
    
    400 407
       c_args: ft2_defines,
    
    ... ... @@ -403,7 +410,7 @@ ft2_lib = library('freetype',
    403 410
       dependencies: ft2_deps,
    
    404 411
       install: true,
    
    405 412
       version: ft2_so_version,
    
    406
    -  link_args: common_ldflags,
    
    413
    +  link_args: common_ldflags + ['-lm'],
    
    407 414
     )
    
    408 415
     
    
    409 416
     
    

  • src/base/ftobjs.c
    ... ... @@ -42,12 +42,18 @@
    42 42
     #include <freetype/internal/services/svkern.h>
    
    43 43
     #include <freetype/internal/services/svtteng.h>
    
    44 44
     
    
    45
    +#include <math.h>
    
    45 46
     #include <freetype/ftdriver.h>
    
    46 47
     
    
    47 48
     #ifdef FT_CONFIG_OPTION_MAC_FONTS
    
    48 49
     #include "ftbase.h"
    
    49 50
     #endif
    
    50 51
     
    
    52
    +#define PIXEL_BITS 8
    
    53
    +
    
    54
    +#define ONE_PIXEL  ( 1 << PIXEL_BITS )
    
    55
    +#define UPSCALE( x )   ( ( x ) * ( ONE_PIXEL >> 6 ) )
    
    56
    +#define DOWNSCALE( x ) ( ( x ) >> ( PIXEL_BITS - 6 ) )
    
    51 57
     
    
    52 58
     #ifdef FT_DEBUG_LEVEL_TRACE
    
    53 59
     
    
    ... ... @@ -893,6 +899,14 @@
    893 899
     
    
    894 900
     
    
    895 901
       /* documentation is in freetype.h */
    
    902
    +  FT_EXPORT_DEF( FT_Error )
    
    903
    +  FT_Refresh_Glyph( FT_Face   face,
    
    904
    +                 FT_UInt   glyph_index)
    
    905
    +
    
    906
    +  {
    
    907
    +    ft_glyphslot_free_bitmap( face->glyph_array[glyph_index] );
    
    908
    +    face->glyph_array[glyph_index]->format = FT_GLYPH_FORMAT_OUTLINE;
    
    909
    +  }
    
    896 910
     
    
    897 911
       FT_EXPORT_DEF( FT_Error )
    
    898 912
       FT_Load_Glyph( FT_Face   face,
    
    ... ... @@ -914,7 +928,7 @@
    914 928
         /* The validity test for `glyph_index' is performed by the */
    
    915 929
         /* font drivers.                                           */
    
    916 930
     
    
    917
    -    slot = face->glyph;
    
    931
    +    slot = face->glyph_array[glyph_index];
    
    918 932
         ft_glyphslot_clear( slot );
    
    919 933
     
    
    920 934
         driver  = face->driver;
    
    ... ... @@ -1616,12 +1630,34 @@
    1616 1630
           return FT_THROW( Invalid_Argument );
    
    1617 1631
     
    
    1618 1632
         args.flags    = FT_OPEN_PATHNAME;
    
    1633
    +    args.size     = 0;
    
    1619 1634
         args.pathname = (char*)pathname;
    
    1620 1635
         args.stream   = NULL;
    
    1621 1636
     
    
    1622 1637
         return ft_open_face_internal( library, &args, face_index, aface, 1 );
    
    1623 1638
       }
    
    1624 1639
     
    
    1640
    +  FT_EXPORT_DEF( FT_Error )
    
    1641
    +  FT_New_Face2( FT_Library   library,
    
    1642
    +                const char*  pathname,
    
    1643
    +                FT_Long      face_index,
    
    1644
    +                FT_Face     *aface,
    
    1645
    +                FT_UInt      size)
    
    1646
    +  {
    
    1647
    +    FT_Open_Args  args;
    
    1648
    +
    
    1649
    +     /* test for valid `library' and `aface' delayed to `FT_Open_Face' */
    
    1650
    +     if ( !pathname )
    
    1651
    +       return FT_THROW( Invalid_Argument );
    
    1652
    +
    
    1653
    +     args.flags    = FT_OPEN_PATHNAME;
    
    1654
    +     args.size     = size;
    
    1655
    +     args.pathname = (char*)pathname;
    
    1656
    +     args.stream   = NULL;
    
    1657
    +
    
    1658
    +     return ft_open_face_internal( library, &args, face_index, aface, 1 );
    
    1659
    +  }
    
    1660
    +
    
    1625 1661
     #endif
    
    1626 1662
     
    
    1627 1663
     
    
    ... ... @@ -2519,6 +2555,306 @@
    2519 2555
       }
    
    2520 2556
     
    
    2521 2557
     
    
    2558
    +static FT_Vector
    
    2559
    +Lerp( float T, FT_Vector P0, FT_Vector P1 )
    
    2560
    +{
    
    2561
    +  FT_Vector p;
    
    2562
    +  p.x = P0.x + T * ( P1.x - P0.x );
    
    2563
    +  p.y = P0.y + T * ( P1.y - P0.y );
    
    2564
    +  return p;
    
    2565
    +}
    
    2566
    +
    
    2567
    +int conic_to2(FT_GlyphSlot* slot, FT_Vector *control, FT_Vector *from, FT_Vector *to, FT_PreLine *ptr)
    
    2568
    +{
    
    2569
    +  /*
    
    2570
    +  Calculate devsq as the square of four times the
    
    2571
    +  distance from the control point to the midpoint of the curve.
    
    2572
    +  This is the place at which the curve is furthest from the
    
    2573
    +  line joining the control points.
    
    2574
    +
    
    2575
    +  4 x point on curve = p0 + 2p1 + p2
    
    2576
    +  4 x midpoint = 4p1
    
    2577
    +
    
    2578
    +  The division by four is omitted to save time.
    
    2579
    +  */
    
    2580
    +  FT_Vector aP0 = { from->x , from->y};
    
    2581
    +  FT_Vector aP1 = { control->x, control->y };
    
    2582
    +  FT_Vector aP2 = { to->x, to->y };
    
    2583
    +
    
    2584
    +  float devx  = aP0.x - aP1.x - aP1.x + aP2.x;
    
    2585
    +  float devy  = aP0.y - aP1.y - aP1.y + aP2.y;
    
    2586
    +  float devsq = devx * devx + devy * devy;
    
    2587
    +
    
    2588
    +  if ( devsq < 0.333f )
    
    2589
    +  {
    
    2590
    +    FT_PreLine pl3       = malloc(sizeof(FT_PreLineRec));
    
    2591
    +            pl3->x1      = (*ptr)->x2;
    
    2592
    +            pl3->y1      = (*ptr)->y2;
    
    2593
    +            pl3->x2      = aP2.x;
    
    2594
    +            pl3->y2      = aP2.y;
    
    2595
    +            pl3->next    = NULL;
    
    2596
    +            (*ptr)->next = pl3;
    
    2597
    +            *ptr         = (*ptr)->next;
    
    2598
    +    return 0;
    
    2599
    +  }
    
    2600
    +
    
    2601
    +  /*
    
    2602
    +  According to Raph Levien, the reason for the subdivision by n (instead of
    
    2603
    +  recursive division by the Casteljau system) is that "I expect the flatness
    
    2604
    +  computation to be semi-expensive (it's done once rather than on each potential
    
    2605
    +  subdivision) and also because you'll often get fewer subdivisions. Taking a
    
    2606
    +  circular arc as a simplifying assumption, where I get n, a recursive approach
    
    2607
    +  would get 2^ceil(lg n), which, if I haven't made any horrible mistakes, is
    
    2608
    +  expected to be 33% more in the limit".
    
    2609
    +  */
    
    2610
    +
    
    2611
    +  const float tol = 3.0f;
    
    2612
    +  int         n   = (int)floor( sqrt( sqrt( tol * devsq ) ) )/8;
    
    2613
    +  FT_Vector p      = aP0;
    
    2614
    +  float     nrecip = 1.0f / ( n + 1.0f );
    
    2615
    +  float     t      = 0.0f;
    
    2616
    +  for ( int i = 0; i < n; i++ )
    
    2617
    +  {
    
    2618
    +    t += nrecip;
    
    2619
    +    FT_Vector next = Lerp( t, Lerp( t, aP0, aP1 ), Lerp( t, aP1, aP2 ) );
    
    2620
    +    FT_PreLine pl4  = malloc(sizeof(FT_PreLineRec));
    
    2621
    +            pl4->x1       = (*ptr)->x2;
    
    2622
    +            pl4->y1       = (*ptr)->y2;
    
    2623
    +            pl4->x2       = next.x;
    
    2624
    +            pl4->y2       = next.y;
    
    2625
    +            pl4->next     = NULL;
    
    2626
    +            (*ptr)->next  = pl4;
    
    2627
    +            *ptr          = (*ptr)->next;
    
    2628
    +            p              = next;
    
    2629
    +  }
    
    2630
    +
    
    2631
    +  FT_PreLine pl5          = malloc(sizeof(FT_PreLineRec));
    
    2632
    +            pl5->x1       = (*ptr)->x2;
    
    2633
    +            pl5->y1       = (*ptr)->y2;
    
    2634
    +            pl5->x2       = aP2.x;
    
    2635
    +            pl5->y2       = aP2.y;
    
    2636
    +            pl5->next     = NULL;
    
    2637
    +            (*ptr)->next  = pl5;
    
    2638
    +            *ptr          = (*ptr)->next;
    
    2639
    +  return 0;
    
    2640
    +}
    
    2641
    +
    
    2642
    +/**
    
    2643
    + * Convert the outline data of slot to prelines
    
    2644
    +*/
    
    2645
    +FT_Error ft_decompose_outline(FT_GlyphSlot* slot){
    
    2646
    +  FT_Vector   v_last;
    
    2647
    +  FT_Vector   v_control;
    
    2648
    +  FT_Vector   v_start;
    
    2649
    +
    
    2650
    +  FT_Vector*  point;
    
    2651
    +  FT_Vector*  limit;
    
    2652
    +  char*       tags;
    
    2653
    +
    
    2654
    +  FT_Error    error;
    
    2655
    +
    
    2656
    +  FT_Int   n;         /* index of contour in outline     */
    
    2657
    +  FT_Int   first;     /* index of first point in contour */
    
    2658
    +  FT_Int   last;      /* index of last point in contour  */
    
    2659
    +
    
    2660
    +  FT_Int   tag;       /* current point's state           */
    
    2661
    +
    
    2662
    +  FT_Int   shift;
    
    2663
    +  FT_Pos   delta;
    
    2664
    +
    
    2665
    +  FT_Outline* outline = &(*slot)->outline;
    
    2666
    +
    
    2667
    +  if ( !outline )
    
    2668
    +    return FT_THROW( Invalid_Outline );
    
    2669
    +  
    
    2670
    +  last = -1;
    
    2671
    +  FT_PreLine ptr = (*slot)->prelines;
    
    2672
    +
    
    2673
    +  for ( n = 0; n < outline->n_contours; n++ )
    
    2674
    +  {
    
    2675
    +    FT_TRACE5(( "ft_decompose_outline: Contour %d\n", n ));
    
    2676
    +
    
    2677
    +    first = last + 1;
    
    2678
    +    last  = outline->contours[n];
    
    2679
    +    if ( last < first ){
    
    2680
    +      FT_TRACE5(( "Invalid Outline"));
    
    2681
    +      break;
    
    2682
    +    }
    
    2683
    +    limit = outline->points + last;
    
    2684
    +
    
    2685
    +    v_start   = outline->points[first];
    
    2686
    +
    
    2687
    +
    
    2688
    +    v_last   = outline->points[last];
    
    2689
    +
    
    2690
    +    v_control = v_start;
    
    2691
    +
    
    2692
    +    point = outline->points + first;
    
    2693
    +    tags  = outline->tags   + first;
    
    2694
    +    tag   = FT_CURVE_TAG( tags[0] );
    
    2695
    +
    
    2696
    +    /* A contour cannot start with a cubic control point! */
    
    2697
    +    if ( tag == FT_CURVE_TAG_CUBIC )
    
    2698
    +    {
    
    2699
    +      FT_TRACE5(( "Invalid Outline"));
    
    2700
    +      break;
    
    2701
    +    }
    
    2702
    +    /* check first point to determine origin */
    
    2703
    +    if ( tag == FT_CURVE_TAG_CONIC )
    
    2704
    +    {
    
    2705
    +      /* first point is conic control.  Yes, this happens. */
    
    2706
    +      if ( FT_CURVE_TAG( outline->tags[last] ) == FT_CURVE_TAG_ON )
    
    2707
    +      {
    
    2708
    +        /* start at last point if it is on the curve */
    
    2709
    +        v_start = v_last;
    
    2710
    +        limit--;
    
    2711
    +      }
    
    2712
    +      else
    
    2713
    +      {
    
    2714
    +        /* if both first and last points are conic,         */
    
    2715
    +        /* start at their middle and record its position    */
    
    2716
    +        /* for closure                                      */
    
    2717
    +        v_start.x = ( v_start.x + v_last.x ) / 2;
    
    2718
    +        v_start.y = ( v_start.y + v_last.y ) / 2;
    
    2719
    +
    
    2720
    +      /* v_last = v_start; */
    
    2721
    +      }
    
    2722
    +      point--;
    
    2723
    +      tags--;
    
    2724
    +    }
    
    2725
    +    
    
    2726
    +    FT_TRACE5(( "  move to (%.2f, %.2f)\n",
    
    2727
    +                (double)v_start.x / 64, (double)v_start.y / 64 ));
    
    2728
    +
    
    2729
    +
    
    2730
    +    FT_PreLine pl  = malloc(sizeof(FT_PreLineRec));
    
    2731
    +          pl->x1 = v_start.x;
    
    2732
    +          pl->y1 = v_start.y;
    
    2733
    +          pl->x2 = v_start.x;
    
    2734
    +          pl->y2 = v_start.y;
    
    2735
    +          pl->next = NULL;
    
    2736
    +
    
    2737
    +          if ( ( *slot )->prelines == NULL )
    
    2738
    +          {
    
    2739
    +            ptr = ( *slot )->prelines = pl;
    
    2740
    +          }
    
    2741
    +          else
    
    2742
    +          {
    
    2743
    +            ptr->next = pl;
    
    2744
    +            ptr       = ptr->next;
    
    2745
    +          }
    
    2746
    +
    
    2747
    +    while ( point < limit )
    
    2748
    +    {
    
    2749
    +      point++;
    
    2750
    +      tags++;
    
    2751
    +
    
    2752
    +      tag = FT_CURVE_TAG( tags[0] );
    
    2753
    +      switch ( tag )
    
    2754
    +      {
    
    2755
    +      case FT_CURVE_TAG_ON:  /* emit a single line_to */
    
    2756
    +        {
    
    2757
    +          FT_Vector  vec;
    
    2758
    +
    
    2759
    +
    
    2760
    +          vec.x = point->x;
    
    2761
    +          vec.y = point->y;
    
    2762
    +
    
    2763
    +          FT_TRACE5(( "  line to (%.2f, %.2f)\n",
    
    2764
    +                      (double)vec.x / 64, (double)vec.y / 64 ));
    
    2765
    +
    
    2766
    +          FT_PreLine pl3  = malloc(sizeof(FT_PreLineRec));
    
    2767
    +          pl3->x1 = ptr->x2;
    
    2768
    +          pl3->y1 = ptr->y2;
    
    2769
    +          pl3->x2 = vec.x;
    
    2770
    +          pl3->y2 = vec.y;
    
    2771
    +          pl3->next = NULL;
    
    2772
    +          ptr->next = pl3;
    
    2773
    +          ptr = ptr->next;
    
    2774
    +          continue;
    
    2775
    +        }
    
    2776
    +      
    
    2777
    +      case FT_CURVE_TAG_CONIC:  /* consume conic arcs */
    
    2778
    +        v_control.x =  point->x ;
    
    2779
    +        v_control.y = point->y ;
    
    2780
    +
    
    2781
    +      Do_Conic:
    
    2782
    +        if ( point < limit )
    
    2783
    +        {
    
    2784
    +          FT_Vector  vec;
    
    2785
    +          FT_Vector  v_middle;
    
    2786
    +
    
    2787
    +
    
    2788
    +          point++;
    
    2789
    +          tags++;
    
    2790
    +          tag = FT_CURVE_TAG( tags[0] );
    
    2791
    +
    
    2792
    +          vec.x = point->x;
    
    2793
    +          vec.y = point->y;
    
    2794
    +
    
    2795
    +          if ( tag == FT_CURVE_TAG_ON )
    
    2796
    +          {
    
    2797
    +            FT_TRACE5(( "  conic to (%.2f, %.2f)"
    
    2798
    +                        " with control (%.2f, %.2f)\n",
    
    2799
    +                        (double)vec.x / 64,
    
    2800
    +                        (double)vec.y / 64,
    
    2801
    +                        (double)v_control.x / 64,
    
    2802
    +                        (double)v_control.y / 64 ));
    
    2803
    +            FT_Vector vex0 = {ptr->x2, ptr->y2};
    
    2804
    +            error = conic_to2(slot, &v_control, &vex0,&vec , &ptr);
    
    2805
    + 
    
    2806
    +            continue;
    
    2807
    +          }
    
    2808
    +
    
    2809
    +          if ( tag != FT_CURVE_TAG_CONIC )
    
    2810
    +          {
    
    2811
    +            FT_TRACE5( ( "Invalid Outline" ) );
    
    2812
    +            break;
    
    2813
    +          }
    
    2814
    +          v_middle.x = ( v_control.x + vec.x ) / 2;
    
    2815
    +          v_middle.y = ( v_control.y + vec.y ) / 2;
    
    2816
    +
    
    2817
    +          FT_TRACE5(( "  conic to (%.2f, %.2f)"
    
    2818
    +                      " with control (%.2f, %.2f)\n",
    
    2819
    +                      (double)v_middle.x / 64,
    
    2820
    +                      (double)v_middle.y / 64,
    
    2821
    +                      (double)v_control.x / 64,
    
    2822
    +                      (double)v_control.y / 64 ));
    
    2823
    +          FT_Vector vex = {ptr->x2, ptr->y2};
    
    2824
    +          error = conic_to2(slot, &v_control, &vex,&v_middle, &ptr);
    
    2825
    +
    
    2826
    +          v_control = vec;
    
    2827
    +          goto Do_Conic;
    
    2828
    +        }
    
    2829
    +
    
    2830
    +        FT_TRACE5(( "  conic to (%.2f, %.2f)"
    
    2831
    +                    " with control (%.2f, %.2f)\n",
    
    2832
    +                    (double)v_start.x / 64,
    
    2833
    +                    (double)v_start.y / 64,
    
    2834
    +                    (double)v_control.x / 64,
    
    2835
    +                    (double)v_control.y / 64 ));
    
    2836
    +        FT_Vector vex2 = {ptr->x2, ptr->y2};
    
    2837
    +        error = conic_to2( slot, &v_control, &vex2, &v_start, &ptr );
    
    2838
    +      }
    
    2839
    +    }
    
    2840
    +
    
    2841
    +    /* close the contour with a line segment */
    
    2842
    +    FT_TRACE5(( "  line to (%.2f, %.2f)\n",
    
    2843
    +                 (double)v_start.x / 64, (double)v_start.y / 64 ));
    
    2844
    +    FT_PreLine pl2  = malloc(sizeof(FT_PreLineRec));
    
    2845
    +    pl2->x1 = ptr->x2;
    
    2846
    +    pl2->y1 = ptr->y2;
    
    2847
    +    pl2->x2 = v_start.x;
    
    2848
    +    pl2->y2 = v_start.y;
    
    2849
    +    pl2->next = NULL;
    
    2850
    +    ptr->next = pl2;
    
    2851
    +    ptr = ptr->next;
    
    2852
    +    
    
    2853
    +  }
    
    2854
    +
    
    2855
    +  return 0;
    
    2856
    +}
    
    2857
    +
    
    2522 2858
       static FT_Error
    
    2523 2859
       ft_open_face_internal( FT_Library           library,
    
    2524 2860
                              const FT_Open_Args*  args,
    
    ... ... @@ -2748,6 +3084,33 @@
    2748 3084
     
    
    2749 3085
             face->size = size;
    
    2750 3086
           }
    
    3087
    +      if ( args->size > 0 )
    
    3088
    +      {
    
    3089
    +        face->glyph_array = (FT_GlyphSlot*)malloc(
    
    3090
    +            face->driver->clazz->slot_object_size * face->num_glyphs );
    
    3091
    +        error = FT_Set_Pixel_Sizes( face, 0, args->size );
    
    3092
    +
    
    3093
    +        for ( int gindex = 0; gindex < face->num_glyphs; gindex++ )
    
    3094
    +        {
    
    3095
    +          driver                = face->driver;
    
    3096
    +          FT_Driver_Class clazz = driver->clazz;
    
    3097
    +          memory                = driver->root.memory;
    
    3098
    +
    
    3099
    +          FT_ALLOC( face->glyph_array[gindex], clazz->slot_object_size );
    
    3100
    +          
    
    3101
    +          face->glyph_array[gindex]->face         = face;
    
    3102
    +          face->glyph_array[gindex]->prel_shifted = 0;
    
    3103
    +          face->glyph_array[gindex]->glyph_index  = gindex;
    
    3104
    +          ft_glyphslot_init( face->glyph_array[gindex] );
    
    3105
    +
    
    3106
    +          face->glyph_array[gindex]->next = NULL;
    
    3107
    +          *face->glyph                    = *face->glyph_array[gindex];
    
    3108
    +
    
    3109
    +          FT_Load_Glyph( face, gindex, FT_LOAD_NO_HINTING );
    
    3110
    +
    
    3111
    +          ft_decompose_outline( &face->glyph_array[gindex] );
    
    3112
    +        }
    
    3113
    +      }
    
    2751 3114
         }
    
    2752 3115
     
    
    2753 3116
         /* some checks */
    

  • src/dense/ftdense.c
    ... ... @@ -16,15 +16,25 @@
    16 16
         defined( __x86_64__ )                        || \
    
    17 17
         defined( _M_AMD64 )                          || \
    
    18 18
         ( defined( _M_IX86_FP ) && _M_IX86_FP >= 2 )
    
    19
    -#  define FT_SSE4_1 1
    
    19
    +  #define FT_SSE4_1 1
    
    20 20
     #else
    
    21
    -#  define FT_SSE4_1 0
    
    21
    +  #define FT_SSE4_1 0
    
    22
    +#endif
    
    23
    +
    
    24
    +#if defined(__ARM_NEON)
    
    25
    +  #define FT_NEON 1
    
    26
    +#else
    
    27
    +  #define FT_NEON 0
    
    22 28
     #endif
    
    23 29
     
    
    24 30
     
    
    25 31
     #if FT_SSE4_1
    
    26 32
     
    
    27
    -    #include <tmmintrin.h>
    
    33
    +  #include <immintrin.h>
    
    34
    +
    
    35
    +#elif FT_NEON
    
    36
    +
    
    37
    +  #include <arm_neon.h>
    
    28 38
     
    
    29 39
     #endif
    
    30 40
     
    
    ... ... @@ -41,6 +51,11 @@
    41 51
     #define FT_MAX( a, b )  ( (a) > (b) ? (a) : (b) )
    
    42 52
     #define FT_ABS( a )     ( (a) < 0 ? -(a) : (a) )
    
    43 53
     
    
    54
    +// TODO: Fix types
    
    55
    +#define FT_UDIVPREP( c, b )                                \
    
    56
    +  FT26D6  b ## _r = c ? (FT26D6)0xFFFFFFFF / ( b ) : 0
    
    57
    +#define FT_UDIV( a, b )                                           \
    
    58
    +  (FT26D6)( ( (FT26D6)( a ) * (FT26D6)( b ## _r ) ) >> 32 )
    
    44 59
     
    
    45 60
     typedef struct dense_TRaster_
    
    46 61
     {
    
    ... ... @@ -79,108 +94,319 @@ dense_line_to( const FT_Vector* to, dense_worker* worker )
    79 94
     }
    
    80 95
     
    
    81 96
     void
    
    82
    -dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    
    97
    +dense_render_line2( dense_worker* worker, FT_PreLine pl )
    
    83 98
     {
    
    84
    -  float from_x = worker->prev_x;
    
    85
    -  float from_y = worker->prev_y;
    
    86
    -  if ( from_y == toy )
    
    87
    -    return;
    
    88 99
     
    
    100
    +  FT26D6 fx = UPSCALE(pl->x1)>>2;
    
    101
    +  FT26D6 fy = UPSCALE(pl->y1)>>2;
    
    89 102
     
    
    90
    -  from_x /= 256.0;
    
    91
    -  from_y /= 256.0;
    
    92
    -  float to_x = tox / 256.0;
    
    93
    -  float to_y = toy / 256.0;
    
    103
    +  FT26D6 from_x = fx;
    
    104
    +  FT26D6 from_y = fy;
    
    94 105
     
    
    95 106
     
    
    96
    -  float dir;
    
    97
    -  if ( from_y < to_y )
    
    98
    -    dir = 1;
    
    99
    -  else
    
    107
    +  FT26D6 tx = UPSCALE(pl->x2)>>2;
    
    108
    +  FT26D6 ty = UPSCALE(pl->y2)>>2;
    
    109
    +
    
    110
    +  if ( fy == ty )
    
    111
    +    return;
    
    112
    +
    
    113
    +  FT26D6 to_x = tx;
    
    114
    +  FT26D6 to_y = ty;
    
    115
    +
    
    116
    +  int dir = 1;
    
    117
    +  if ( from_y >= to_y )
    
    100 118
       {
    
    101 119
         dir = -1;
    
    102
    -    FT_SWAP(from_x, to_x );
    
    103
    -    FT_SWAP(from_y, to_y );
    
    120
    +    FT_SWAP(from_x, to_x);
    
    121
    +    FT_SWAP(from_y, to_y);
    
    104 122
       }
    
    105 123
     
    
    106 124
       // Clip to the height.
    
    107
    -  if ( from_y >= worker->m_h || to_y <= 0 )
    
    125
    +  if ( from_y >= worker->m_h<<6 || to_y <= 0 )
    
    108 126
         return;
    
    109 127
     
    
    110
    -  float dxdy = ( to_x - from_x ) / (float)( to_y - from_y );
    
    128
    +  FT26D6 deltax,deltay;
    
    129
    +  deltax = to_x - from_x;
    
    130
    +  deltay = to_y - from_y;
    
    131
    +
    
    132
    +    FT_UDIVPREP(from_x != to_x, deltax);
    
    133
    +
    
    134
    +    FT_UDIVPREP(from_y != to_y, deltay);
    
    135
    +
    
    111 136
       if ( from_y < 0 )
    
    112 137
       {
    
    113
    -    from_x -= from_y * dxdy;
    
    138
    +    from_x -= from_y * deltax/deltay;
    
    114 139
         from_y = 0;
    
    115 140
       }
    
    116
    -  if ( to_y > worker->m_h )
    
    141
    +
    
    142
    +  if ( to_y > worker->m_h<<6 )
    
    117 143
       {
    
    118
    -    to_x -= ( to_y - worker->m_h ) * dxdy;
    
    119
    -    to_y = (float)worker->m_h;
    
    144
    +    to_x -= (( to_y - worker->m_h<<6 ) * deltax/deltay);
    
    145
    +    to_y = worker->m_h<<6;
    
    120 146
       }
    
    121 147
     
    
    122
    -  float  x       = from_x;
    
    123
    -  int    y0      = (int)from_y;
    
    124
    -  int    y_limit = (int)ceil( to_y );
    
    125
    -  float* m_a     = worker->m_a;
    
    126 148
     
    
    127
    -  for ( int y = y0; y < y_limit; y++ )
    
    128
    -  {
    
    129
    -    int   linestart = y * worker->m_w;
    
    130
    -    float dy        = fmin( y + 1.0f, to_y ) - fmax( (float)y, from_y );
    
    131
    -    float xnext     = x + dxdy * dy;
    
    132
    -    float d         = dy * dir;
    
    149
    +  if(deltax == 0){
    
    150
    +    FT26D6 x       = from_x;
    
    151
    +    int   x0i    = x>>6;
    
    152
    +    FT26D6 x0floor = x0i<<6;
    
    153
    +
    
    154
    +    // y-coordinate of first pixel of line
    
    155
    +    int    y0      = from_y>>6;
    
    156
    +
    
    157
    +    // y-coordinate of last pixel of line
    
    158
    +    int    y_limit = (to_y + 0x3f)>>6;
    
    159
    +    FT20D12* m_a   = worker->m_a;
    
    133 160
     
    
    134
    -    float x0, x1;
    
    135
    -    if ( x < xnext )
    
    161
    +
    
    162
    +
    
    163
    +    for ( int y = y0; y < y_limit; y++ )
    
    136 164
         {
    
    137
    -      x0 = x;
    
    138
    -      x1 = xnext;
    
    165
    +      int linestart = y * worker->m_w;
    
    166
    +
    
    167
    +     FT26D6 dy   = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
    
    168
    +
    
    169
    +      m_a[linestart + x0i] += dir*dy*(64 - x + x0floor);
    
    170
    +      m_a[linestart + ( x0i + 1 )] += dir*dy*(x-x0floor);
    
    171
    +
    
    139 172
         }
    
    140
    -    else
    
    173
    +  }
    
    174
    +  else
    
    175
    +  {
    
    176
    +    int    x       = from_x;
    
    177
    +    int    y0      = from_y>>6;
    
    178
    +    int    y_limit = (to_y + 0x3f)>>6;
    
    179
    +
    
    180
    +    FT20D12* m_a     = worker->m_a;
    
    181
    +
    
    182
    +    for ( int y = y0; y < y_limit; y++ )
    
    141 183
         {
    
    142
    -      x0 = xnext;
    
    143
    -      x1 = x;
    
    184
    +      int   linestart = y * worker->m_w;
    
    185
    +      FT26D6 dy        = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
    
    186
    +      FT26D6 xnext     = x + FT_UDIV((dy*deltax), deltay);
    
    187
    +      FT26D6 d         = dy * dir;
    
    188
    +
    
    189
    +      FT26D6 x0, x1;
    
    190
    +      if ( x < xnext )
    
    191
    +      {
    
    192
    +        x0 = x;
    
    193
    +        x1 = xnext;
    
    194
    +      }
    
    195
    +      else
    
    196
    +      {
    
    197
    +        x0 = xnext;
    
    198
    +        x1 = x;
    
    199
    +      }
    
    200
    +
    
    201
    +
    
    202
    +      int   x0i    = x0>>6;
    
    203
    +      FT26D6 x0floor = x0i<<6;
    
    204
    +
    
    205
    +
    
    206
    +      int   x1i    = (x1+0x3f)>>6;
    
    207
    +      FT26D6 x1ceil =  x1i <<6;
    
    208
    +
    
    209
    +      if ( x1i <= x0i + 1 )
    
    210
    +      {
    
    211
    +        FT26D6 xmf = ( ( x + xnext )>>1) - x0floor;
    
    212
    +        m_a[linestart + x0i] += d * ((1<<6) - xmf);
    
    213
    +        m_a[linestart + ( x0i + 1 )] += d * xmf;
    
    214
    +      }
    
    215
    +      else
    
    216
    +      {
    
    217
    +
    
    218
    +        FT26D6 oneOverS = x1 - x0;
    
    219
    +
    
    220
    +        FT_UDIVPREP(x1 != x0, oneOverS);
    
    221
    +
    
    222
    +        FT26D6 x0f = x0 - x0floor;
    
    223
    +
    
    224
    +
    
    225
    +        FT26D6 oneMinusX0f = (1<<6) - x0f;
    
    226
    +        FT26D6 a0 = FT_UDIV(((oneMinusX0f * oneMinusX0f) >> 1), oneOverS);
    
    227
    +        FT26D6 x1f = x1 - x1ceil + (1<<6);
    
    228
    +        FT26D6 am =  FT_UDIV(((x1f * x1f) >> 1) , oneOverS);
    
    229
    +
    
    230
    +        m_a[linestart + x0i] += d * a0;
    
    231
    +
    
    232
    +        if ( x1i == x0i + 2 )
    
    233
    +          m_a[linestart + ( x0i + 1 )] += d * ( (1<<6) - a0 - am );
    
    234
    +        else
    
    235
    +        {
    
    236
    +          FT26D6 a1 =  FT_UDIV((((1<<6) + (1<<5) - x0f) << 6) , oneOverS);
    
    237
    +          m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 );
    
    238
    +
    
    239
    +          FT26D6 dTimesS =  FT_UDIV((d << 12) , oneOverS);
    
    240
    +
    
    241
    +          for ( FT26D6 xi = x0i + 2; xi < x1i - 1; xi++ )
    
    242
    +            m_a[linestart + xi] += dTimesS;
    
    243
    +
    
    244
    +          FT26D6 a2 = a1 +  FT_UDIV((( x1i - x0i - 3 )<<12),oneOverS);
    
    245
    +          m_a[linestart + ( x1i - 1 )] += d * ( (1<<6) - a2 - am );
    
    246
    +        }
    
    247
    +        m_a[linestart + x1i] += d * am;
    
    248
    +      }
    
    249
    +      x = xnext;
    
    144 250
         }
    
    251
    +  }
    
    252
    +}
    
    253
    +
    
    254
    +
    
    255
    +void
    
    256
    +dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
    
    257
    +{
    
    258
    +
    
    259
    +  FT26D6 fx = worker->prev_x>>2;
    
    260
    +  FT26D6 fy = worker->prev_y>>2;
    
    261
    +
    
    262
    +  FT26D6 from_x = fx;
    
    263
    +  FT26D6 from_y = fy;
    
    264
    +
    
    265
    +
    
    266
    +  FT26D6 tx = tox>>2;
    
    267
    +  FT26D6 ty = toy>>2;
    
    268
    +
    
    269
    +  if ( fy == ty )
    
    270
    +    return;
    
    271
    +
    
    272
    +  FT26D6 to_x = tx;
    
    273
    +  FT26D6 to_y = ty;
    
    274
    +
    
    275
    +  int dir = 1;
    
    276
    +  if ( from_y >= to_y )
    
    277
    +  {
    
    278
    +    dir = -1;
    
    279
    +    FT_SWAP(from_x, to_x);
    
    280
    +    FT_SWAP(from_y, to_y);
    
    281
    +  }
    
    282
    +
    
    283
    +  // Clip to the height.
    
    284
    +  if ( from_y >= worker->m_h<<6 || to_y <= 0 )
    
    285
    +    return;
    
    145 286
     
    
    146
    -    /*
    
    147
    -    It's possible for x0 to be negative on the last scanline because of
    
    148
    -    floating-point inaccuracy That would cause an out-of-bounds array access at
    
    149
    -    index -1.
    
    150
    -    */
    
    151
    -    float x0floor = x0 <= 0.0f ? 0.0f : (float)floor( x0 );
    
    152
    -
    
    153
    -    int   x0i    = (int)x0floor;
    
    154
    -    float x1ceil = (float)ceil( x1 );
    
    155
    -    int   x1i    = (int)x1ceil;
    
    156
    -    if ( x1i <= x0i + 1 )
    
    287
    +  FT26D6 deltax,deltay;
    
    288
    +  deltax = to_x - from_x;
    
    289
    +  deltay = to_y - from_y;
    
    290
    +
    
    291
    +    FT_UDIVPREP(from_x != to_x, deltax);
    
    292
    +
    
    293
    +    FT_UDIVPREP(from_y != to_y, deltay);
    
    294
    +
    
    295
    +  if ( from_y < 0 )
    
    296
    +  {
    
    297
    +    from_x -= from_y * deltax/deltay;
    
    298
    +    from_y = 0;
    
    299
    +  }
    
    300
    +
    
    301
    +  if ( to_y > worker->m_h<<6 )
    
    302
    +  {
    
    303
    +    to_x -= (( to_y - worker->m_h<<6 ) * deltax/deltay);
    
    304
    +    to_y = worker->m_h<<6;
    
    305
    +  }
    
    306
    +
    
    307
    +
    
    308
    +  if(deltax == 0){
    
    309
    +    FT26D6 x       = from_x;
    
    310
    +    int   x0i    = x>>6;
    
    311
    +    FT26D6 x0floor = x0i<<6;
    
    312
    +
    
    313
    +    // y-coordinate of first pixel of line
    
    314
    +    int    y0      = from_y>>6;
    
    315
    +
    
    316
    +    // y-coordinate of last pixel of line
    
    317
    +    int    y_limit = (to_y + 0x3f)>>6;
    
    318
    +    FT20D12* m_a   = worker->m_a;
    
    319
    +
    
    320
    +
    
    321
    +
    
    322
    +    for ( int y = y0; y < y_limit; y++ )
    
    157 323
         {
    
    158
    -      float xmf = 0.5f * ( x + xnext ) - x0floor;
    
    159
    -      m_a[linestart + x0i] += d - d * xmf;
    
    160
    -      m_a[linestart + ( x0i + 1 )] += d * xmf;
    
    324
    +      int linestart = y * worker->m_w;
    
    325
    +
    
    326
    +     FT26D6 dy   = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
    
    327
    +
    
    328
    +      m_a[linestart + x0i] += dir*dy*(64 - x + x0floor);
    
    329
    +      m_a[linestart + ( x0i + 1 )] += dir*dy*(x-x0floor);
    
    330
    +
    
    161 331
         }
    
    162
    -    else
    
    332
    +  }
    
    333
    +  else
    
    334
    +  {
    
    335
    +    int    x       = from_x;
    
    336
    +    int    y0      = from_y>>6;
    
    337
    +    int    y_limit = (to_y + 0x3f)>>6;
    
    338
    +
    
    339
    +    FT20D12* m_a     = worker->m_a;
    
    340
    +
    
    341
    +    for ( int y = y0; y < y_limit; y++ )
    
    163 342
         {
    
    164
    -      float s   = 1.0f / ( x1 - x0 );
    
    165
    -      float x0f = x0 - x0floor;
    
    166
    -      float a0  = 0.5f * s * ( 1.0f - x0f ) * ( 1.0f - x0f );
    
    167
    -      float x1f = x1 - x1ceil + 1.0f;
    
    168
    -      float am  = 0.5f * s * x1f * x1f;
    
    169
    -      m_a[linestart + x0i] += d * a0;
    
    170
    -      if ( x1i == x0i + 2 )
    
    171
    -        m_a[linestart + ( x0i + 1 )] += d * ( 1.0f - a0 - am );
    
    343
    +      int   linestart = y * worker->m_w;
    
    344
    +      FT26D6 dy        = FT_MIN( (y + 1)<<6, to_y ) - FT_MAX( y<<6, from_y );
    
    345
    +      FT26D6 xnext     = x + FT_UDIV((dy*deltax), deltay);
    
    346
    +      FT26D6 d         = dy * dir;
    
    347
    +
    
    348
    +      FT26D6 x0, x1;
    
    349
    +      if ( x < xnext )
    
    350
    +      {
    
    351
    +        x0 = x;
    
    352
    +        x1 = xnext;
    
    353
    +      }
    
    354
    +      else
    
    355
    +      {
    
    356
    +        x0 = xnext;
    
    357
    +        x1 = x;
    
    358
    +      }
    
    359
    +
    
    360
    +
    
    361
    +      int   x0i    = x0>>6;
    
    362
    +      FT26D6 x0floor = x0i<<6;
    
    363
    +
    
    364
    +
    
    365
    +      int   x1i    = (x1+0x3f)>>6;
    
    366
    +      FT26D6 x1ceil =  x1i <<6;
    
    367
    +
    
    368
    +      if ( x1i <= x0i + 1 )
    
    369
    +      {
    
    370
    +        FT26D6 xmf = ( ( x + xnext )>>1) - x0floor;
    
    371
    +        m_a[linestart + x0i] += d * ((1<<6) - xmf);
    
    372
    +        m_a[linestart + ( x0i + 1 )] += d * xmf;
    
    373
    +      }
    
    172 374
           else
    
    173 375
           {
    
    174
    -        float a1 = s * ( 1.5f - x0f );
    
    175
    -        m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 );
    
    176
    -        for ( int xi = x0i + 2; xi < x1i - 1; xi++ )
    
    177
    -          m_a[linestart + xi] += d * s;
    
    178
    -        float a2 = a1 + ( x1i - x0i - 3 ) * s;
    
    179
    -        m_a[linestart + ( x1i - 1 )] += d * ( 1.0f - a2 - am );
    
    376
    +
    
    377
    +        FT26D6 oneOverS = x1 - x0;
    
    378
    +
    
    379
    +        FT_UDIVPREP(x1 != x0, oneOverS);
    
    380
    +
    
    381
    +        FT26D6 x0f = x0 - x0floor;
    
    382
    +
    
    383
    +
    
    384
    +        FT26D6 oneMinusX0f = (1<<6) - x0f;
    
    385
    +        FT26D6 a0 = FT_UDIV(((oneMinusX0f * oneMinusX0f) >> 1), oneOverS);
    
    386
    +        FT26D6 x1f = x1 - x1ceil + (1<<6);
    
    387
    +        FT26D6 am =  FT_UDIV(((x1f * x1f) >> 1) , oneOverS);
    
    388
    +
    
    389
    +        m_a[linestart + x0i] += d * a0;
    
    390
    +
    
    391
    +        if ( x1i == x0i + 2 )
    
    392
    +          m_a[linestart + ( x0i + 1 )] += d * ( (1<<6) - a0 - am );
    
    393
    +        else
    
    394
    +        {
    
    395
    +          FT26D6 a1 =  FT_UDIV((((1<<6) + (1<<5) - x0f) << 6) , oneOverS);
    
    396
    +          m_a[linestart + ( x0i + 1 )] += d * ( a1 - a0 );
    
    397
    +
    
    398
    +          FT26D6 dTimesS =  FT_UDIV((d << 12) , oneOverS);
    
    399
    +
    
    400
    +          for ( FT26D6 xi = x0i + 2; xi < x1i - 1; xi++ )
    
    401
    +            m_a[linestart + xi] += dTimesS;
    
    402
    +
    
    403
    +          FT26D6 a2 = a1 +  FT_UDIV((( x1i - x0i - 3 )<<12),oneOverS);
    
    404
    +          m_a[linestart + ( x1i - 1 )] += d * ( (1<<6) - a2 - am );
    
    405
    +        }
    
    406
    +        m_a[linestart + x1i] += d * am;
    
    180 407
           }
    
    181
    -      m_a[linestart + x1i] += d * am;
    
    408
    +      x = xnext;
    
    182 409
         }
    
    183
    -    x = xnext;
    
    184 410
       }
    
    185 411
     }
    
    186 412
     
    
    ... ... @@ -359,53 +585,92 @@ FT_DEFINE_OUTLINE_FUNCS( dense_decompose_funcs,
    359 585
     )
    
    360 586
     
    
    361 587
     static int
    
    362
    -dense_render_glyph( dense_worker* worker, const FT_Bitmap* target )
    
    588
    +dense_render_glyph( dense_worker* worker, const FT_Bitmap* target, FT_PreLine pl )
    
    363 589
     {
    
    364
    -  FT_Error error = FT_Outline_Decompose( &( worker->outline ),
    
    365
    -                                         &dense_decompose_funcs, worker );
    
    590
    +  FT_Error error = 0;
    
    591
    +
    
    592
    +  while (pl != NULL)
    
    593
    +  {
    
    594
    +    dense_render_line2(worker, pl);
    
    595
    +    pl = pl->next;
    
    596
    +  }
    
    597
    +
    
    366 598
       // Render into bitmap
    
    367
    -  const float* source = worker->m_a;
    
    599
    +  const FT20D12* source = worker->m_a;
    
    368 600
       unsigned char* dest     = target->buffer;
    
    369 601
       unsigned char* dest_end = target->buffer + worker->m_w * worker->m_h;
    
    370 602
     
    
    371 603
     #if FT_SSE4_1
    
    372 604
     
    
    373
    -  __m128 offset = _mm_setzero_ps();
    
    374
    -  __m128i mask = _mm_set1_epi32(0x0c080400);
    
    375
    -  __m128 sign_mask = _mm_set1_ps(-0.f);
    
    376
    -  for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
    
    377
    -    __m128 x = _mm_load_ps(&source[i]);
    
    378
    -    x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
    
    379
    -    x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
    
    380
    -    x = _mm_add_ps(x, offset);
    
    381
    -    __m128 y = _mm_andnot_ps(sign_mask, x);  // fabs(x)
    
    382
    -    y = _mm_min_ps(y, _mm_set1_ps(1.0f));
    
    383
    -    y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
    
    384
    -    __m128i z = _mm_cvtps_epi32(y);
    
    385
    -    z = _mm_shuffle_epi8(z, mask);
    
    386
    -    _mm_store_ss((float *)&dest[i], (__m128)z);
    
    387
    -    offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
    
    605
    +  __m128i offset = _mm_setzero_si128();
    
    606
    +  __m128i nzero = _mm_castps_si128(_mm_set1_ps(-0.0));
    
    607
    +
    
    608
    +  for (int i = 0; i < worker->m_h*worker->m_w; i += 4)
    
    609
    +  {
    
    610
    +    // load 4 floats from source
    
    611
    +
    
    612
    +    __m128i x = _mm_load_si128( (__m128i*)&source[i] );
    
    613
    +
    
    614
    +    x = _mm_add_epi32( x, _mm_slli_si128( x, 4 ) );
    
    615
    +
    
    616
    +    x = _mm_add_epi32( x, _mm_slli_si128( x, 8 ) );
    
    617
    +
    
    618
    +    // add the prefix sum of previous 4 ints to all ints
    
    619
    +    x = _mm_add_epi32( x, offset );
    
    620
    +
    
    621
    +    // take absolute value
    
    622
    +    __m128i y = _mm_srli_epi32( _mm_abs_epi32( x) , 4 );
    
    623
    +    y = _mm_packus_epi16(_mm_packs_epi32(y, nzero), nzero);
    
    624
    +    _mm_storeu_si32(&dest[i], y);
    
    625
    +
    
    626
    +    // store the current prefix sum in offset
    
    627
    +    offset = _mm_shuffle_epi32(x,_MM_SHUFFLE( 3, 3, 3, 3 ) );
    
    388 628
       }
    
    629
    +#elif FT_NEON
    
    630
    +  int32x4_t offset = vdupq_n_s32(0);
    
    631
    +  int32x4_t nzero =  vreinterpretq_s32_f32(vdupq_n_f32(-0.0));
    
    632
    +
    
    633
    +  for (int i = 0; i < worker->m_h*worker->m_w; i += 4)
    
    634
    +  {
    
    635
    +    // load 4 floats from source
    
    636
    +
    
    637
    +    int32x4_t x = vld1q_s32( (int32_t*)&source[i] );
    
    638
    +
    
    639
    +    x = vaddq_s32( x, vreinterpretq_s32_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_s32( x), 12) ));
    
    640
    +
    
    641
    +    x = vaddq_s32(x, vreinterpretq_s32_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_s32(x), 8)));
    
    642
    +
    
    643
    +    // add the prefsum of previous 4 floats to all current floats
    
    644
    +    x = vaddq_s32( x, offset );
    
    645
    +
    
    646
    +    int32x4_t y = vshrq_n_s32( vabsq_s32( x) , 4 );
    
    647
    +    y = vreinterpretq_s32_s16(vcombine_s16(vqmovn_s32(y), vqmovn_s32(nzero)));
    
    648
    +    y = vreinterpretq_s32_u8(vcombine_u8(vqmovun_s16(vreinterpretq_s16_s32(y)), vqmovun_s16(vreinterpretq_s16_s32(nzero))));
    
    389 649
     
    
    390
    -#else /* FT_SSE4_1 */
    
    650
    +    vst1q_s32(&dest[i], y);
    
    651
    +
    
    652
    +    offset = vdupq_laneq_s32(x,3 );
    
    653
    +  }
    
    654
    +#else
    
    655
    +
    
    656
    +  FT20D12 value = 0;
    
    391 657
     
    
    392
    -  float          value    = 0.0f;
    
    393 658
       while ( dest < dest_end )
    
    394 659
       {
    
    395 660
         value += *source++;
    
    396
    -    if ( value > 0.0f )
    
    397
    -    {
    
    398
    -      int n = (int)( fabs( value ) * 255.0f + 0.5f );
    
    399
    -      if ( n > 255 )
    
    400
    -        n = 255;
    
    661
    +
    
    662
    +    if(value > 0){
    
    663
    +      int n = value >>4;
    
    664
    +
    
    665
    +      if(n>255)n=255;
    
    401 666
           *dest = (unsigned char)n;
    
    402
    -    }
    
    403
    -    else
    
    667
    +    }else{
    
    404 668
           *dest = 0;
    
    669
    +    }
    
    405 670
         dest++;
    
    406 671
       }
    
    407 672
     
    
    408
    -#endif /* FT_SSE4_1 */
    
    673
    +#endif /* FT_SSE4_1 || FT_NEON */
    
    409 674
     
    
    410 675
       free(worker->m_a);
    
    411 676
       return error;
    
    ... ... @@ -416,6 +681,7 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
    416 681
     {
    
    417 682
       const FT_Outline* outline    = (const FT_Outline*)params->source;
    
    418 683
       FT_Bitmap*  target_map = params->target;
    
    684
    +  FT_PreLine pl = params->prelines;
    
    419 685
     
    
    420 686
       dense_worker worker[1];
    
    421 687
     
    
    ... ... @@ -442,12 +708,12 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
    442 708
       worker->m_w = target_map->pitch;
    
    443 709
       worker->m_h = target_map->rows;
    
    444 710
     
    
    445
    -  int size = worker->m_w * worker->m_h + 4;
    
    711
    +  int size = (worker->m_w * worker->m_h + 3) & ~3;
    
    446 712
     
    
    447
    -  worker->m_a      = malloc( sizeof( float ) * size );
    
    713
    +  worker->m_a      = malloc( sizeof( FT20D12 ) * size );
    
    448 714
       worker->m_a_size = size;
    
    449 715
     
    
    450
    -  memset( worker->m_a, 0, ( sizeof( float ) * size ) );
    
    716
    +  memset( worker->m_a, 0, ( sizeof( FT20D12 ) * size ) );
    
    451 717
       /* exit if nothing to do */
    
    452 718
       if ( worker->m_w <= worker->m_origin_x || worker->m_h <= worker->m_origin_y )
    
    453 719
       {
    
    ... ... @@ -457,7 +723,7 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
    457 723
       // Invert the pitch to account for different +ve y-axis direction in dense array
    
    458 724
       // (maybe temporary solution)
    
    459 725
       target_map->pitch *= -1;
    
    460
    -  return dense_render_glyph( worker, target_map );
    
    726
    +  return dense_render_glyph( worker, target_map, pl );
    
    461 727
     }
    
    462 728
     
    
    463 729
     FT_DEFINE_RASTER_FUNCS(
    

  • src/dense/ftdense.h
    ... ... @@ -19,10 +19,14 @@ extern "C"
    19 19
     {
    
    20 20
     #endif
    
    21 21
     
    
    22
    +
    
    23
    +  typedef signed long FT26D6;            /* 26.6 fixed-point representation  */
    
    24
    +  typedef signed int FT20D12;            /* 20.12 fixed-point representation  */
    
    25
    +
    
    22 26
       typedef struct
    
    23 27
       {
    
    24 28
         /** The array used to store signed area differences. */
    
    25
    -    float* m_a;
    
    29
    +    FT20D12* m_a;
    
    26 30
         /** The number of elements in m_a. */
    
    27 31
         int m_a_size;
    
    28 32
         /** The width of the current raster in pixels. */
    

  • src/dense/ftdenserend.c
    ... ... @@ -139,7 +139,8 @@
    139 139
     
    
    140 140
     
    
    141 141
         /* allocate new one */
    
    142
    -    if ( FT_ALLOC_MULT( bitmap->buffer, bitmap->rows, bitmap->pitch ) )
    
    142
    +    // ARM NEON crashes if memory is not aligned
    
    143
    +    if ( FT_ALLOC_MULT( bitmap->buffer, 1,bitmap->rows*bitmap->pitch + 16 ) )
    
    143 144
           goto Exit;
    
    144 145
     
    
    145 146
         slot->internal->flags |= FT_GLYPH_OWN_BITMAP;
    
    ... ... @@ -161,12 +162,25 @@
    161 162
         }
    
    162 163
     
    
    163 164
         /* translate outline to render it into the bitmap */
    
    164
    -    if ( x_shift || y_shift )
    
    165
    -      FT_Outline_Translate( outline, x_shift, y_shift );
    
    165
    +    if ( (x_shift || y_shift) && !slot->prel_shifted){
    
    166
    +      //FT_Outline_Translate( outline, x_shift, y_shift );
    
    167
    +      FT_PreLine pl = slot->prelines;
    
    168
    +      while (pl!=NULL)
    
    169
    +      {
    
    170
    +        pl->x1 += x_shift;
    
    171
    +        pl->y1 += y_shift;
    
    172
    +        pl->x2 += x_shift;
    
    173
    +        pl->y2 += y_shift;
    
    174
    +
    
    175
    +        pl = pl->next;
    
    176
    +      }
    
    177
    +      slot->prel_shifted = 1;
    
    178
    +    }
    
    166 179
     
    
    167 180
         /* set up parameters */
    
    168 181
         params.target = bitmap;
    
    169 182
         params.source = outline;
    
    183
    +    params.prelines = slot->prelines;
    
    170 184
     
    
    171 185
         /* render the outline */
    
    172 186
         error =
    
    ... ... @@ -184,8 +198,8 @@
    184 198
           slot->internal->flags &= ~FT_GLYPH_OWN_BITMAP;
    
    185 199
         }
    
    186 200
     
    
    187
    -    if ( x_shift || y_shift )
    
    188
    -      FT_Outline_Translate( outline, -x_shift, -y_shift );
    
    201
    +    // if ( x_shift || y_shift )
    
    202
    +    //   FT_Outline_Translate( outline, -x_shift, -y_shift );
    
    189 203
     
    
    190 204
         return error;
    
    191 205
       }
    

  • src/dense/rules.mk
    ... ... @@ -24,7 +24,7 @@ DENSE_COMPILE := $(CC) $(ANSIFLAGS) \
    24 24
                             $I$(subst /,$(COMPILER_SEP),$(DENSE_DIR)) \
    
    25 25
                             $(INCLUDE_FLAGS)                          \
    
    26 26
                             $(FT_CFLAGS)                              \
    
    27
    -                        "-msse4.1"
    
    27
    +                        "-march=native"
    
    28 28
     
    
    29 29
     # DENSE driver sources (i.e., C files)
    
    30 30
     #
    


  • reply via email to

    [Prev in Thread] Current Thread [Next in Thread]