Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stable #7

Open
wants to merge 5 commits into
base: stable
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion doc/reST/cli.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

*********************
Command Line Options
*********************
Expand Down Expand Up @@ -1261,6 +1262,18 @@ Temporal / motion search options
Enable motion estimation with source frame pixels, in this mode,
motion estimation can be computed independently. Default disabled.

.. option:: --hme, --no-hme

Enable 3-level Hierarchical motion estimation at One-Sixteenth,
Quarter and Full resolution. Default disabled.

.. option:: --hme-search <integer|string>,<integer|string>,<integer|string>

Motion search method for HME Level 0, 1 and 2. Refer to :option:`--me` for values.
Specify search method for each level. Alternatively, specify a single value
which will apply to all levels. Default is hex,umh,umh for
levels 0,1,2 respectively.

Spatial/intra options
=====================

Expand Down Expand Up @@ -1633,7 +1646,7 @@ Quality, rate control and rate distortion options
ignored. Slower presets will generally achieve better compression
efficiency (and generate smaller bitstreams). Default disabled.

.. option:: --aq-mode <0|1|2|3>
.. option:: --aq-mode <0|1|2|3|4>

Adaptive Quantization operating mode. Raise or lower per-block
quantization based on complexity analysis of the source image. The
Expand All @@ -1647,6 +1660,7 @@ Quality, rate control and rate distortion options
3. AQ enabled with auto-variance and bias to dark scenes. This is
recommended for 8-bit encodes or low-bitrate 10-bit encodes, to
prevent color banding/blocking.
4. AQ enabled with auto-variance and edge information.

.. option:: --aq-strength <float>

Expand Down
2 changes: 1 addition & 1 deletion source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ option(NATIVE_BUILD "Target the build CPU" OFF)
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
# X265_BUILD must be incremented each time the public API is changed
set(X265_BUILD 176)
set(X265_BUILD 178)
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
"${PROJECT_BINARY_DIR}/x265.def")
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
Expand Down
56 changes: 56 additions & 0 deletions source/common/lowres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
heightFullRes = origPic->m_picHeight;
width = origPic->m_picWidth / 2;
lines = origPic->m_picHeight / 2;
bEnableHME = param->bEnableHME ? 1 : 0;
lumaStride = width + 2 * origPic->m_lumaMarginX;
if (lumaStride & 31)
lumaStride += 32 - (lumaStride & 31);
Expand All @@ -64,6 +65,7 @@ bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
maxBlocksInColFullRes = maxBlocksInCol * 2;
int cuCount = maxBlocksInRow * maxBlocksInCol;
int cuCountFullRes = (qgSize > 8) ? cuCount : cuCount << 2;
isHMELowres = param->bEnableHME ? 1 : 0;

/* rounding the width to multiple of lowres CU size */
width = maxBlocksInRow * X265_LOWRES_CU_SIZE;
Expand All @@ -78,6 +80,7 @@ bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes);
if (qgSize == 8)
CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount);
CHECKED_MALLOC_ZERO(edgeInclined, int, cuCountFullRes);
}

if (origPic->m_param->bAQMotion)
Expand Down Expand Up @@ -137,6 +140,26 @@ bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
lowresPlane[2] = buffer[2] + padoffset;
lowresPlane[3] = buffer[3] + padoffset;

if (bEnableHME)
{
intptr_t lumaStrideHalf = lumaStride / 2;
if (lumaStrideHalf & 31)
lumaStrideHalf += 32 - (lumaStrideHalf & 31);
size_t planesizeHalf = planesize / 2;
size_t padoffsetHalf = padoffset / 2;
/* allocate lower-res buffers */
CHECKED_MALLOC_ZERO(lowerResBuffer[0], pixel, 4 * planesizeHalf);

lowerResBuffer[1] = lowerResBuffer[0] + planesizeHalf;
lowerResBuffer[2] = lowerResBuffer[1] + planesizeHalf;
lowerResBuffer[3] = lowerResBuffer[2] + planesizeHalf;

lowerResPlane[0] = lowerResBuffer[0] + padoffsetHalf;
lowerResPlane[1] = lowerResBuffer[1] + padoffsetHalf;
lowerResPlane[2] = lowerResBuffer[2] + padoffsetHalf;
lowerResPlane[3] = lowerResBuffer[3] + padoffsetHalf;
}

CHECKED_MALLOC(intraCost, int32_t, cuCount);
CHECKED_MALLOC(intraMode, uint8_t, cuCount);

Expand All @@ -155,6 +178,16 @@ bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
CHECKED_MALLOC(lowresMvs[1][i], MV, cuCount);
CHECKED_MALLOC(lowresMvCosts[0][i], int32_t, cuCount);
CHECKED_MALLOC(lowresMvCosts[1][i], int32_t, cuCount);
if (bEnableHME)
{
int maxBlocksInRowLowerRes = ((width/2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
int maxBlocksInColLowerRes = ((lines/2) + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
int cuCountLowerRes = maxBlocksInRowLowerRes * maxBlocksInColLowerRes;
CHECKED_MALLOC(lowerResMvs[0][i], MV, cuCountLowerRes);
CHECKED_MALLOC(lowerResMvs[1][i], MV, cuCountLowerRes);
CHECKED_MALLOC(lowerResMvCosts[0][i], int32_t, cuCountLowerRes);
CHECKED_MALLOC(lowerResMvCosts[1][i], int32_t, cuCountLowerRes);
}
}

return true;
Expand All @@ -166,6 +199,8 @@ bool Lowres::create(x265_param* param, PicYuv *origPic, uint32_t qgSize)
void Lowres::destroy()
{
X265_FREE(buffer[0]);
if(bEnableHME)
X265_FREE(lowerResBuffer[0]);
X265_FREE(intraCost);
X265_FREE(intraMode);

Expand All @@ -184,12 +219,20 @@ void Lowres::destroy()
X265_FREE(lowresMvs[1][i]);
X265_FREE(lowresMvCosts[0][i]);
X265_FREE(lowresMvCosts[1][i]);
if (bEnableHME)
{
X265_FREE(lowerResMvs[0][i]);
X265_FREE(lowerResMvs[1][i]);
X265_FREE(lowerResMvCosts[0][i]);
X265_FREE(lowerResMvCosts[1][i]);
}
}
X265_FREE(qpAqOffset);
X265_FREE(invQscaleFactor);
X265_FREE(qpCuTreeOffset);
X265_FREE(propagateCost);
X265_FREE(invQscaleFactor8x8);
X265_FREE(edgeInclined);
X265_FREE(qpAqMotionOffset);
X265_FREE(blockVariance);
if (maxAQDepth > 0)
Expand Down Expand Up @@ -253,5 +296,18 @@ void Lowres::init(PicYuv *origPic, int poc)
extendPicBorder(lowresPlane[1], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
extendPicBorder(lowresPlane[2], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);
extendPicBorder(lowresPlane[3], lumaStride, width, lines, origPic->m_lumaMarginX, origPic->m_lumaMarginY);

if (origPic->m_param->bEnableHME)
{
primitives.frameInitLowerRes(lowresPlane[0],
lowerResPlane[0], lowerResPlane[1], lowerResPlane[2], lowerResPlane[3],
lumaStride, lumaStride/2, (width / 2), (lines / 2));
extendPicBorder(lowerResPlane[0], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
extendPicBorder(lowerResPlane[1], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
extendPicBorder(lowerResPlane[2], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
extendPicBorder(lowerResPlane[3], lumaStride/2, width/2, lines/2, origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2);
fpelLowerResPlane[0] = lowerResPlane[0];
}

fpelPlane[0] = lowresPlane[0];
}
49 changes: 37 additions & 12 deletions source/common/lowres.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,13 @@ struct ReferencePlanes
pixel* lowresPlane[4];
PicYuv* reconPic;

/* 1/16th resolution : Level-0 HME planes */
pixel* fpelLowerResPlane[3];
pixel* lowerResPlane[4];

bool isWeighted;
bool isLowres;
bool isHMELowres;

intptr_t lumaStride;
intptr_t chromaStride;
Expand All @@ -59,46 +64,58 @@ struct ReferencePlanes

/* lowres motion compensation, you must provide a buffer and stride for QPEL averaged pixels
* in case QPEL is required. Else it returns a pointer to the HPEL pixels */
inline pixel *lowresMC(intptr_t blockOffset, const MV& qmv, pixel *buf, intptr_t& outstride)
inline pixel *lowresMC(intptr_t blockOffset, const MV& qmv, pixel *buf, intptr_t& outstride, bool hme)
{
intptr_t YStride = hme ? lumaStride / 2 : lumaStride;
pixel *plane[4];
for (int i = 0; i < 4; i++)
{
plane[i] = hme ? lowerResPlane[i] : lowresPlane[i];
}
if ((qmv.x | qmv.y) & 1)
{
int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
pixel *frefA = plane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * YStride;
int qmvx = qmv.x + (qmv.x & 1);
int qmvy = qmv.y + (qmv.y & 1);
int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
primitives.pu[LUMA_8x8].pixelavg_pp[(outstride % 64 == 0) && (lumaStride % 64 == 0)](buf, outstride, frefA, lumaStride, frefB, lumaStride, 32);
pixel *frefB = plane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * YStride;
primitives.pu[LUMA_8x8].pixelavg_pp[(outstride % 64 == 0) && (YStride % 64 == 0)](buf, outstride, frefA, YStride, frefB, YStride, 32);
return buf;
}
else
{
outstride = lumaStride;
outstride = YStride;
int hpel = (qmv.y & 2) | ((qmv.x & 2) >> 1);
return lowresPlane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
return plane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * YStride;
}
}

inline int lowresQPelCost(pixel *fenc, intptr_t blockOffset, const MV& qmv, pixelcmp_t comp)
inline int lowresQPelCost(pixel *fenc, intptr_t blockOffset, const MV& qmv, pixelcmp_t comp, bool hme)
{
intptr_t YStride = hme ? lumaStride / 2 : lumaStride;
pixel *plane[4];
for (int i = 0; i < 4; i++)
{
plane[i] = hme ? lowerResPlane[i] : lowresPlane[i];
}
if ((qmv.x | qmv.y) & 1)
{
ALIGN_VAR_16(pixel, subpelbuf[8 * 8]);
int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1);
pixel *frefA = lowresPlane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
pixel *frefA = plane[hpelA] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * YStride;
int qmvx = qmv.x + (qmv.x & 1);
int qmvy = qmv.y + (qmv.y & 1);
int hpelB = (qmvy & 2) | ((qmvx & 2) >> 1);
pixel *frefB = lowresPlane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * lumaStride;
primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](subpelbuf, 8, frefA, lumaStride, frefB, lumaStride, 32);
pixel *frefB = plane[hpelB] + blockOffset + (qmvx >> 2) + (qmvy >> 2) * YStride;
primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](subpelbuf, 8, frefA, YStride, frefB, YStride, 32);
return comp(fenc, FENC_STRIDE, subpelbuf, 8);
}
else
{
int hpel = (qmv.y & 2) | ((qmv.x & 2) >> 1);
pixel *fref = lowresPlane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * lumaStride;
return comp(fenc, FENC_STRIDE, fref, lumaStride);
pixel *fref = plane[hpel] + blockOffset + (qmv.x >> 2) + (qmv.y >> 2) * YStride;
return comp(fenc, FENC_STRIDE, fref, YStride);
}
}
};
Expand Down Expand Up @@ -150,6 +167,7 @@ struct PicQPAdaptationLayer
struct Lowres : public ReferencePlanes
{
pixel *buffer[4];
pixel *lowerResBuffer[4]; // Level-0 buffer

int frameNum; // Presentation frame number
int sliceType; // Slice type decided by lookahead
Expand Down Expand Up @@ -181,6 +199,11 @@ struct Lowres : public ReferencePlanes
uint32_t maxBlocksInRowFullRes;
uint32_t maxBlocksInColFullRes;

/* Hierarchical Motion Estimation */
bool bEnableHME;
int32_t* lowerResMvCosts[2][X265_BFRAME_MAX + 2];
MV* lowerResMvs[2][X265_BFRAME_MAX + 2];

/* used for vbvLookahead */
int plannedType[X265_LOOKAHEAD_MAX + 1];
int64_t plannedSatd[X265_LOOKAHEAD_MAX + 1];
Expand All @@ -197,6 +220,8 @@ struct Lowres : public ReferencePlanes
uint64_t wp_ssd[3]; // This is different than SSDY, this is sum(pixel^2) - sum(pixel)^2 for entire frame
uint64_t wp_sum[3];
double frameVariance;
int* edgeInclined;


/* cutree intermediate data */
PicQPAdaptationLayer* pAQLayer;
Expand Down
44 changes: 41 additions & 3 deletions source/common/param.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ void x265_param_default(x265_param* param)
param->bEnableTSkipFast = 0;
param->maxNumReferences = 3;
param->bEnableTemporalMvp = 1;
param->bEnableHME = 0;
param->hmeSearchMethod[0] = X265_HEX_SEARCH;
param->hmeSearchMethod[1] = param->hmeSearchMethod[2] = X265_UMH_SEARCH;
param->bSourceReferenceEstimation = 0;
param->limitTU = 0;
param->dynamicRd = 0;
Expand Down Expand Up @@ -1282,6 +1285,27 @@ int x265_param_parse(x265_param* p, const char* name, const char* value)
OPT("fades") p->bEnableFades = atobool(value);
OPT("field") p->bField = atobool( value );
OPT("cll") p->bEmitCLL = atobool(value);
OPT("hme") p->bEnableHME = atobool(value);
OPT("hme-search")
{
char search[3][5];
memset(search, '\0', 15 * sizeof(char));
if(3 == sscanf(value, "%d,%d,%d", &p->hmeSearchMethod[0], &p->hmeSearchMethod[1], &p->hmeSearchMethod[2]) ||
3 == sscanf(value, "%4[^,],%4[^,],%4[^,]", search[0], search[1], search[2]))
{
if(search[0][0])
for(int level = 0; level < 3; level++)
p->hmeSearchMethod[level] = parseName(search[level], x265_motion_est_names, bError);
}
else if (sscanf(value, "%d", &p->hmeSearchMethod[0]) || sscanf(value, "%s", search[0]))
{
if (search[0][0]) {
p->hmeSearchMethod[0] = parseName(search[0], x265_motion_est_names, bError);
p->hmeSearchMethod[1] = p->hmeSearchMethod[2] = p->hmeSearchMethod[0];
}
}
p->bEnableHME = true;
}
else
return X265_PARAM_BAD_NAME;
}
Expand Down Expand Up @@ -1522,7 +1546,7 @@ int x265_check_params(x265_param* param)
"Lookahead depth must be less than 256");
CHECK(param->lookaheadSlices > 16 || param->lookaheadSlices < 0,
"Lookahead slices must between 0 and 16");
CHECK(param->rc.aqMode < X265_AQ_NONE || X265_AQ_AUTO_VARIANCE_BIASED < param->rc.aqMode,
CHECK(param->rc.aqMode < X265_AQ_NONE || X265_AQ_EDGE < param->rc.aqMode,
"Aq-Mode is out of range");
CHECK(param->rc.aqStrength < 0 || param->rc.aqStrength > 3,
"Aq-Strength is out of range");
Expand Down Expand Up @@ -1732,8 +1756,13 @@ void x265_print_params(x265_param* param)
x265_log(param, X265_LOG_INFO, "Residual QT: max TU size, max depth : %d / %d inter / %d intra\n",
param->maxTUSize, param->tuQTMaxInterDepth, param->tuQTMaxIntraDepth);

x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge : %s / %d / %d / %d\n",
x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand);
if (param->bEnableHME)
x265_log(param, X265_LOG_INFO, "HME L0,1,2 / range / subpel / merge : %s, %s, %s / %d / %d / %d\n",
x265_motion_est_names[param->hmeSearchMethod[0]], x265_motion_est_names[param->hmeSearchMethod[1]], x265_motion_est_names[param->hmeSearchMethod[2]], param->searchRange, param->subpelRefine, param->maxNumMergeCand);
else
x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge : %s / %d / %d / %d\n",
x265_motion_est_names[param->searchMethod], param->searchRange, param->subpelRefine, param->maxNumMergeCand);

if (param->keyframeMax != INT_MAX || param->scenecutThreshold)
x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax, param->scenecutThreshold, param->scenecutBias * 100);
else
Expand Down Expand Up @@ -1928,6 +1957,9 @@ char *x265_param2string(x265_param* p, int padx, int pady)
s += sprintf(s, " subme=%d", p->subpelRefine);
s += sprintf(s, " merange=%d", p->searchRange);
BOOL(p->bEnableTemporalMvp, "temporal-mvp");
BOOL(p->bEnableHME, "hme");
if (p->bEnableHME)
s += sprintf(s, " Level 0,1,2=%d,%d,%d", p->hmeSearchMethod[0], p->hmeSearchMethod[1], p->hmeSearchMethod[2]);
BOOL(p->bEnableWeightedPred, "weightp");
BOOL(p->bEnableWeightedBiPred, "weightb");
BOOL(p->bSourceReferenceEstimation, "analyze-src-pics");
Expand Down Expand Up @@ -2215,6 +2247,12 @@ void x265_copy_params(x265_param* dst, x265_param* src)
dst->subpelRefine = src->subpelRefine;
dst->searchRange = src->searchRange;
dst->bEnableTemporalMvp = src->bEnableTemporalMvp;
dst->bEnableHME = src->bEnableHME;
if (src->bEnableHME)
{
for (int level = 0; level < 3; level++)
dst->hmeSearchMethod[level] = src->hmeSearchMethod[level];
}
dst->bEnableWeightedBiPred = src->bEnableWeightedBiPred;
dst->bEnableWeightedPred = src->bEnableWeightedPred;
dst->bSourceReferenceEstimation = src->bSourceReferenceEstimation;
Expand Down
1 change: 1 addition & 0 deletions source/common/pixel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1309,6 +1309,7 @@ void setupPixelPrimitives_c(EncoderPrimitives &p)
p.scale1D_128to64[NONALIGNED] = p.scale1D_128to64[ALIGNED] = scale1D_128to64;
p.scale2D_64to32 = scale2D_64to32;
p.frameInitLowres = frame_init_lowres_core;
p.frameInitLowerRes = frame_init_lowres_core;
p.ssim_4x4x2_core = ssim_4x4x2_core;
p.ssim_end_4 = ssim_end_4;

Expand Down
1 change: 1 addition & 0 deletions source/common/primitives.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ struct EncoderPrimitives
saoCuStatsE3_t saoCuStatsE3;

downscale_t frameInitLowres;
downscale_t frameInitLowerRes;
cutree_propagate_cost propagateCost;
cutree_fix8_unpack fix8Unpack;
cutree_fix8_pack fix8Pack;
Expand Down
Loading